From d9938ae897c8515a6b831b2799fbd568465f3648 Mon Sep 17 00:00:00 2001 From: Dharan Aditya Date: Tue, 27 Aug 2024 15:46:15 +0530 Subject: [PATCH 01/16] impl map_keys --- datafusion/common/src/utils/mod.rs | 17 +-- datafusion/functions-nested/src/lib.rs | 3 + .../functions-nested/src/map_extract.rs | 3 +- datafusion/functions-nested/src/map_keys.rs | 108 ++++++++++++++++++ datafusion/functions-nested/src/utils.rs | 18 ++- 5 files changed, 130 insertions(+), 19 deletions(-) create mode 100644 datafusion/functions-nested/src/map_keys.rs diff --git a/datafusion/common/src/utils/mod.rs b/datafusion/common/src/utils/mod.rs index 839f890bf077..418ea380bc2c 100644 --- a/datafusion/common/src/utils/mod.rs +++ b/datafusion/common/src/utils/mod.rs @@ -35,7 +35,7 @@ use arrow_array::{ Array, FixedSizeListArray, LargeListArray, ListArray, OffsetSizeTrait, RecordBatchOptions, }; -use arrow_schema::{DataType, Fields}; +use arrow_schema::DataType; use sqlparser::ast::Ident; use sqlparser::dialect::GenericDialect; use sqlparser::parser::Parser; @@ -754,21 +754,6 @@ pub fn combine_limit( (combined_skip, combined_fetch) } -pub fn get_map_entry_field(data_type: &DataType) -> Result<&Fields> { - match data_type { - DataType::Map(field, _) => { - let field_data_type = field.data_type(); - match field_data_type { - DataType::Struct(fields) => Ok(fields), - _ => { - _internal_err!("Expected a Struct type, got {:?}", field_data_type) - } - } - } - _ => _internal_err!("Expected a Map type, got {:?}", data_type), - } -} - #[cfg(test)] mod tests { use crate::ScalarValue::Null; diff --git a/datafusion/functions-nested/src/lib.rs b/datafusion/functions-nested/src/lib.rs index cc0a7b55cf86..8c820e29c454 100644 --- a/datafusion/functions-nested/src/lib.rs +++ b/datafusion/functions-nested/src/lib.rs @@ -43,6 +43,7 @@ pub mod length; pub mod make_array; pub mod map; pub mod map_extract; +mod map_keys; pub mod planner; pub mod position; pub mod range; @@ -83,6 +84,7 @@ pub mod expr_fn { pub use super::length::array_length; pub use super::make_array::make_array; pub use super::map_extract::map_extract; + pub use super::map_keys::map_keys; pub use super::position::array_position; pub use super::position::array_positions; pub use super::range::gen_series; @@ -146,6 +148,7 @@ pub fn all_default_nested_functions() -> Vec> { replace::array_replace_udf(), map::map_udf(), map_extract::map_extract_udf(), + map_keys::map_keys_udf(), ] } diff --git a/datafusion/functions-nested/src/map_extract.rs b/datafusion/functions-nested/src/map_extract.rs index 82f0d8d6c15e..9f0c4ad29c60 100644 --- a/datafusion/functions-nested/src/map_extract.rs +++ b/datafusion/functions-nested/src/map_extract.rs @@ -24,7 +24,6 @@ use arrow::datatypes::DataType; use arrow_array::{Array, MapArray}; use arrow_buffer::OffsetBuffer; use arrow_schema::Field; -use datafusion_common::utils::get_map_entry_field; use datafusion_common::{cast::as_map_array, exec_err, Result}; use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; @@ -32,7 +31,7 @@ use std::any::Any; use std::sync::Arc; use std::vec; -use crate::utils::make_scalar_function; +use crate::utils::{get_map_entry_field, make_scalar_function}; // Create static instances of ScalarUDFs for each function make_udf_expr_and_func!( diff --git a/datafusion/functions-nested/src/map_keys.rs b/datafusion/functions-nested/src/map_keys.rs new file mode 100644 index 000000000000..45c57f51b6d6 --- /dev/null +++ b/datafusion/functions-nested/src/map_keys.rs @@ -0,0 +1,108 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! [`ScalarUDFImpl`] definitions for map_keys function. + +use crate::utils::{get_map_entry_field, make_scalar_function}; +use arrow_array::{Array, ArrayRef, ListArray}; +use arrow_schema::{DataType, Field}; +use datafusion_common::{cast::as_map_array, exec_err, Result}; +use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; +use std::any::Any; +use std::sync::Arc; + +make_udf_expr_and_func!( + MapKeysFunc, + map_keys, + map, + "Return a list of all keys in the map.", + map_keys_udf +); + +#[derive(Debug)] +pub(crate) struct MapKeysFunc { + signature: Signature, +} + +impl MapKeysFunc { + pub fn new() -> Self { + Self { + signature: Signature::user_defined(Volatility::Immutable), + } + } +} + +impl ScalarUDFImpl for MapKeysFunc { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "map_keys" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, arg_types: &[DataType]) -> datafusion_common::Result { + if arg_types.len() != 1 { + return exec_err!("map_keys expects single argument"); + } + let map_type = &arg_types[0]; + let map_fields = get_map_entry_field(map_type)?; + Ok(DataType::List(Arc::new(Field::new( + "keys", + map_fields.first().unwrap().data_type().to_owned(), + false, + )))) + } + + fn invoke(&self, args: &[ColumnarValue]) -> datafusion_common::Result { + make_scalar_function(map_keys_inner)(args) + } + + fn coerce_types( + &self, + arg_types: &[DataType], + ) -> datafusion_common::Result> { + if arg_types.len() != 1 { + return exec_err!("map_keys expects single argument"); + } + Ok(vec![arg_types[0].to_owned()]) + } +} + +fn map_keys_inner(args: &[ArrayRef]) -> Result { + if args.len() != 1 { + return exec_err!("map_keys expects single argument"); + } + + let map_array = match args[0].data_type() { + DataType::Map(_, _) => as_map_array(&args[0])?, + _ => return exec_err!("Argument for map_extract should be a map"), + }; + + Ok(Arc::new( + ListArray::new( + Arc::new(Field::new("keys", map_array.key_type().clone(), false)), + map_array.offsets().clone(), + Arc::clone(map_array.keys()), + None + ) + )) +} diff --git a/datafusion/functions-nested/src/utils.rs b/datafusion/functions-nested/src/utils.rs index 688e1633e5cf..1fdcda30fc99 100644 --- a/datafusion/functions-nested/src/utils.rs +++ b/datafusion/functions-nested/src/utils.rs @@ -26,11 +26,12 @@ use arrow_array::{ UInt32Array, }; use arrow_buffer::OffsetBuffer; -use arrow_schema::Field; +use arrow_schema::{Field, Fields}; use datafusion_common::cast::{as_large_list_array, as_list_array}; use datafusion_common::{exec_err, plan_err, Result, ScalarValue}; use core::any::type_name; +use datafusion_common::error::_internal_err; use datafusion_common::DataFusionError; use datafusion_expr::{ColumnarValue, ScalarFunctionImplementation}; @@ -253,6 +254,21 @@ pub(crate) fn compute_array_dims( } } +pub(crate) fn get_map_entry_field(data_type: &DataType) -> Result<&Fields> { + match data_type { + DataType::Map(field, _) => { + let field_data_type = field.data_type(); + match field_data_type { + DataType::Struct(fields) => Ok(fields), + _ => { + _internal_err!("Expected a Struct type, got {:?}", field_data_type) + } + } + } + _ => _internal_err!("Expected a Map type, got {:?}", data_type), + } +} + #[cfg(test)] mod tests { use super::*; From a2f63a6de9e198701b6c7e4590e06446533b870a Mon Sep 17 00:00:00 2001 From: Dharan Aditya Date: Tue, 27 Aug 2024 15:56:00 +0530 Subject: [PATCH 02/16] rename field name --- datafusion/functions-nested/src/map_keys.rs | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/datafusion/functions-nested/src/map_keys.rs b/datafusion/functions-nested/src/map_keys.rs index 45c57f51b6d6..d2472c8f178d 100644 --- a/datafusion/functions-nested/src/map_keys.rs +++ b/datafusion/functions-nested/src/map_keys.rs @@ -66,7 +66,7 @@ impl ScalarUDFImpl for MapKeysFunc { let map_type = &arg_types[0]; let map_fields = get_map_entry_field(map_type)?; Ok(DataType::List(Arc::new(Field::new( - "keys", + "item", map_fields.first().unwrap().data_type().to_owned(), false, )))) @@ -97,12 +97,10 @@ fn map_keys_inner(args: &[ArrayRef]) -> Result { _ => return exec_err!("Argument for map_extract should be a map"), }; - Ok(Arc::new( - ListArray::new( - Arc::new(Field::new("keys", map_array.key_type().clone(), false)), - map_array.offsets().clone(), - Arc::clone(map_array.keys()), - None - ) - )) + Ok(Arc::new(ListArray::new( + Arc::new(Field::new("item", map_array.key_type().clone(), false)), + map_array.offsets().clone(), + Arc::clone(map_array.keys()), + None, + ))) } From 3e5d0996baa7cfbec9e333be70613d410a23470a Mon Sep 17 00:00:00 2001 From: Dharan Aditya Date: Tue, 27 Aug 2024 16:02:42 +0530 Subject: [PATCH 03/16] add logic tests --- datafusion/sqllogictest/test_files/map.slt | 38 +++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/datafusion/sqllogictest/test_files/map.slt b/datafusion/sqllogictest/test_files/map.slt index 270e4beccc52..afc494880585 100644 --- a/datafusion/sqllogictest/test_files/map.slt +++ b/datafusion/sqllogictest/test_files/map.slt @@ -568,8 +568,44 @@ select map_extract(column1, 1), map_extract(column1, 5), map_extract(column1, 7) [] [[4, , 6]] [] [] [] [[1, , 3]] +# Tests for map_keys + +query ? +SELECT map_keys(MAP { 'a': 1, 2: 3 }); +---- +[a, 2] + +query ? +SELECT map_keys(MAP {'a':1, 'b':2, 'c':3 }) FROM t; +---- +[a, b, c] +[a, b, c] +[a, b, c] + +query ? +SELECT map_keys(Map{column1: column2, column3: column4}) FROM t; +---- +[a, k1] +[b, k3] +[d, k5] + +query ? +SELECT map_keys(map(column5, column6)) FROM t; +---- +[k1, k2] +[k3] +[k5] + +query ? +SELECT map_keys(map(column8, column9)) FROM t; +---- +[[1, 2, 3]] +[[4]] +[[1, 2]] + + statement ok drop table map_array_table_1; statement ok -drop table map_array_table_2; \ No newline at end of file +drop table map_array_table_2; From a2a3c29695ec1ee647b7de11cdad59c698ce5f7e Mon Sep 17 00:00:00 2001 From: Dharan Aditya Date: Tue, 27 Aug 2024 16:05:46 +0530 Subject: [PATCH 04/16] one more --- datafusion/sqllogictest/test_files/map.slt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/datafusion/sqllogictest/test_files/map.slt b/datafusion/sqllogictest/test_files/map.slt index afc494880585..5fcd9bdeff80 100644 --- a/datafusion/sqllogictest/test_files/map.slt +++ b/datafusion/sqllogictest/test_files/map.slt @@ -603,6 +603,11 @@ SELECT map_keys(map(column8, column9)) FROM t; [[4]] [[1, 2]] +query ? +SELECT map_keys(Map{}); +---- +[] + statement ok drop table map_array_table_1; From 42cf99ea570436d84975139b5f297b8e88c99f43 Mon Sep 17 00:00:00 2001 From: Dharan Aditya Date: Tue, 27 Aug 2024 16:11:53 +0530 Subject: [PATCH 05/16] owned to clone --- datafusion/functions-nested/src/map_keys.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/functions-nested/src/map_keys.rs b/datafusion/functions-nested/src/map_keys.rs index d2472c8f178d..a0d072a82572 100644 --- a/datafusion/functions-nested/src/map_keys.rs +++ b/datafusion/functions-nested/src/map_keys.rs @@ -67,7 +67,7 @@ impl ScalarUDFImpl for MapKeysFunc { let map_fields = get_map_entry_field(map_type)?; Ok(DataType::List(Arc::new(Field::new( "item", - map_fields.first().unwrap().data_type().to_owned(), + map_fields.first().unwrap().data_type().clone(), false, )))) } @@ -83,7 +83,7 @@ impl ScalarUDFImpl for MapKeysFunc { if arg_types.len() != 1 { return exec_err!("map_keys expects single argument"); } - Ok(vec![arg_types[0].to_owned()]) + Ok(vec![arg_types[0].clone()]) } } From c2abe605198ab63bbd8ec9dd839c3249fe91eff0 Mon Sep 17 00:00:00 2001 From: Dharan Aditya Date: Tue, 27 Aug 2024 21:21:30 +0530 Subject: [PATCH 06/16] more tests --- datafusion/sqllogictest/test_files/map.slt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/datafusion/sqllogictest/test_files/map.slt b/datafusion/sqllogictest/test_files/map.slt index 5fcd9bdeff80..e42adb5e47f6 100644 --- a/datafusion/sqllogictest/test_files/map.slt +++ b/datafusion/sqllogictest/test_files/map.slt @@ -608,6 +608,12 @@ SELECT map_keys(Map{}); ---- [] +query ? +SELECT map_keys(column1) from map_array_table_1; +---- +[1, 2, 3] +[4, 5, 6] +[7, 8, 9] statement ok drop table map_array_table_1; From e960b0f4d40fc3832dfdf7972cb784459d505049 Mon Sep 17 00:00:00 2001 From: Dharan Aditya Date: Wed, 28 Aug 2024 13:30:26 +0530 Subject: [PATCH 07/16] typo --- datafusion/functions-nested/src/map_keys.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/functions-nested/src/map_keys.rs b/datafusion/functions-nested/src/map_keys.rs index a0d072a82572..20daadb5002c 100644 --- a/datafusion/functions-nested/src/map_keys.rs +++ b/datafusion/functions-nested/src/map_keys.rs @@ -94,7 +94,7 @@ fn map_keys_inner(args: &[ArrayRef]) -> Result { let map_array = match args[0].data_type() { DataType::Map(_, _) => as_map_array(&args[0])?, - _ => return exec_err!("Argument for map_extract should be a map"), + _ => return exec_err!("Argument for map_keys should be a map"), }; Ok(Arc::new(ListArray::new( From f5a89512e4c359b047f3f5ba33981a14b56ae7d0 Mon Sep 17 00:00:00 2001 From: Dharan Aditya Date: Wed, 28 Aug 2024 13:31:30 +0530 Subject: [PATCH 08/16] impl --- datafusion/functions-nested/src/lib.rs | 3 + datafusion/functions-nested/src/map_values.rs | 106 ++++++++++++++++++ 2 files changed, 109 insertions(+) create mode 100644 datafusion/functions-nested/src/map_values.rs diff --git a/datafusion/functions-nested/src/lib.rs b/datafusion/functions-nested/src/lib.rs index 8c820e29c454..1c26b230d9ce 100644 --- a/datafusion/functions-nested/src/lib.rs +++ b/datafusion/functions-nested/src/lib.rs @@ -44,6 +44,7 @@ pub mod make_array; pub mod map; pub mod map_extract; mod map_keys; +mod map_values; pub mod planner; pub mod position; pub mod range; @@ -85,6 +86,7 @@ pub mod expr_fn { pub use super::make_array::make_array; pub use super::map_extract::map_extract; pub use super::map_keys::map_keys; + pub use super::map_values::map_values; pub use super::position::array_position; pub use super::position::array_positions; pub use super::range::gen_series; @@ -149,6 +151,7 @@ pub fn all_default_nested_functions() -> Vec> { map::map_udf(), map_extract::map_extract_udf(), map_keys::map_keys_udf(), + map_values::map_values_udf(), ] } diff --git a/datafusion/functions-nested/src/map_values.rs b/datafusion/functions-nested/src/map_values.rs new file mode 100644 index 000000000000..589ea6a28b3b --- /dev/null +++ b/datafusion/functions-nested/src/map_values.rs @@ -0,0 +1,106 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! [`ScalarUDFImpl`] definitions for map_keys function. + +use crate::utils::{get_map_entry_field, make_scalar_function}; +use arrow_array::{Array, ArrayRef, ListArray}; +use arrow_schema::{DataType, Field}; +use datafusion_common::{cast::as_map_array, exec_err, Result}; +use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; +use std::any::Any; +use std::sync::Arc; + +make_udf_expr_and_func!( + MapValuesFunc, + map_values, + map, + "Return a list of all values in the map.", + map_values_udf +); + +#[derive(Debug)] +pub(crate) struct MapValuesFunc { + signature: Signature, +} + +impl MapValuesFunc { + pub fn new() -> Self { + Self { + signature: Signature::user_defined(Volatility::Immutable), + } + } +} + +impl ScalarUDFImpl for MapValuesFunc { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "map_values" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, arg_types: &[DataType]) -> datafusion_common::Result { + if arg_types.len() != 1 { + return exec_err!("map_values expects single argument"); + } + let map_type = &arg_types[0]; + let map_fields = get_map_entry_field(map_type)?; + Ok(DataType::List(Arc::new(Field::new( + "item", + map_fields.last().unwrap().data_type().clone(), + true, + )))) + } + + fn invoke(&self, args: &[ColumnarValue]) -> datafusion_common::Result { + make_scalar_function(map_values_inner)(args) + } + + fn coerce_types( + &self, + arg_types: &[DataType], + ) -> datafusion_common::Result> { + if arg_types.len() != 1 { + return exec_err!("map_values expects single argument"); + } + Ok(vec![arg_types[0].clone()]) + } +} + +fn map_values_inner(args: &[ArrayRef]) -> Result { + if args.len() != 1 { + return exec_err!("map_keys expects single argument"); + } + + let map_array = match args[0].data_type() { + DataType::Map(_, _) => as_map_array(&args[0])?, + _ => return exec_err!("Argument for map_values should be a map"), + }; + + Ok(Arc::new(ListArray::new( + Arc::new(Field::new("item", map_array.value_type().clone(), true)), + map_array.offsets().clone(), + Arc::clone(map_array.values()), + None, + ))) +} From 656be9608699443a68395949b02d1bedd699d88d Mon Sep 17 00:00:00 2001 From: Dharan Aditya Date: Wed, 28 Aug 2024 13:35:47 +0530 Subject: [PATCH 09/16] add logic tests --- datafusion/sqllogictest/test_files/map.slt | 48 ++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/datafusion/sqllogictest/test_files/map.slt b/datafusion/sqllogictest/test_files/map.slt index e42adb5e47f6..c66334c4de2a 100644 --- a/datafusion/sqllogictest/test_files/map.slt +++ b/datafusion/sqllogictest/test_files/map.slt @@ -615,6 +615,54 @@ SELECT map_keys(column1) from map_array_table_1; [4, 5, 6] [7, 8, 9] + +# Tests for map_values + +query ? +SELECT map_values(MAP { 'a': 1, 2: 3 }); +---- +[1, 3] + +query ? +SELECT map_values(MAP {'a':1, 'b':2, 'c':3 }) FROM t; +---- +[1, 2, 3] +[1, 2, 3] +[1, 2, 3] + +query ? +SELECT map_values(Map{column1: column2, column3: column4}) FROM t; +---- +[1, 10] +[2, 30] +[4, 50] + +query ? +SELECT map_values(map(column5, column6)) FROM t; +---- +[1, 2] +[3] +[5] + +query ? +SELECT map_values(map(column8, column9)) FROM t; +---- +[a] +[b] +[c] + +query ? +SELECT map_values(Map{}); +---- +[] + +query ? +SELECT map_values(column1) from map_array_table_1; +---- +[[1, , 3], [4, , 6], [7, 8, 9]] +[[1, , 3], [4, , 6], [7, 8, 9]] +[[1, , 3], [9, , 6], [7, 8, 9]] + statement ok drop table map_array_table_1; From c4582afc2e2ab870a4af39db28b1fa3b9e8145db Mon Sep 17 00:00:00 2001 From: Dharan Aditya Date: Wed, 28 Aug 2024 23:24:22 +0530 Subject: [PATCH 10/16] chore --- datafusion/functions-nested/src/lib.rs | 2 +- datafusion/functions-nested/src/utils.rs | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/datafusion/functions-nested/src/lib.rs b/datafusion/functions-nested/src/lib.rs index 8c820e29c454..c1c67533afdb 100644 --- a/datafusion/functions-nested/src/lib.rs +++ b/datafusion/functions-nested/src/lib.rs @@ -43,7 +43,7 @@ pub mod length; pub mod make_array; pub mod map; pub mod map_extract; -mod map_keys; +pub mod map_keys; pub mod planner; pub mod position; pub mod range; diff --git a/datafusion/functions-nested/src/utils.rs b/datafusion/functions-nested/src/utils.rs index 1fdcda30fc99..e4431a7f69e3 100644 --- a/datafusion/functions-nested/src/utils.rs +++ b/datafusion/functions-nested/src/utils.rs @@ -28,10 +28,9 @@ use arrow_array::{ use arrow_buffer::OffsetBuffer; use arrow_schema::{Field, Fields}; use datafusion_common::cast::{as_large_list_array, as_list_array}; -use datafusion_common::{exec_err, plan_err, Result, ScalarValue}; +use datafusion_common::{exec_err, internal_err, plan_err, Result, ScalarValue}; use core::any::type_name; -use datafusion_common::error::_internal_err; use datafusion_common::DataFusionError; use datafusion_expr::{ColumnarValue, ScalarFunctionImplementation}; @@ -261,11 +260,11 @@ pub(crate) fn get_map_entry_field(data_type: &DataType) -> Result<&Fields> { match field_data_type { DataType::Struct(fields) => Ok(fields), _ => { - _internal_err!("Expected a Struct type, got {:?}", field_data_type) + internal_err!("Expected a Struct type, got {:?}", field_data_type) } } } - _ => _internal_err!("Expected a Map type, got {:?}", data_type), + _ => internal_err!("Expected a Map type, got {:?}", data_type), } } From be13cd0794cf9d39ce6852f081c55dc357309c2d Mon Sep 17 00:00:00 2001 From: Dharan Aditya Date: Fri, 30 Aug 2024 01:52:51 +0530 Subject: [PATCH 11/16] add docs --- datafusion/functions-nested/src/map_values.rs | 2 +- .../source/user-guide/sql/scalar_functions.md | 54 +++++++++++++++++++ 2 files changed, 55 insertions(+), 1 deletion(-) diff --git a/datafusion/functions-nested/src/map_values.rs b/datafusion/functions-nested/src/map_values.rs index 589ea6a28b3b..57a92c166be8 100644 --- a/datafusion/functions-nested/src/map_values.rs +++ b/datafusion/functions-nested/src/map_values.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -//! [`ScalarUDFImpl`] definitions for map_keys function. +//! [`ScalarUDFImpl`] definitions for map_values function. use crate::utils::{get_map_entry_field, make_scalar_function}; use arrow_array::{Array, ArrayRef, ListArray}; diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index c7b3409ba7cd..118a08a9428e 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -3641,6 +3641,8 @@ Unwraps struct fields into columns. - [map](#map) - [make_map](#make_map) - [map_extract](#map_extract) +- [map_keys](#map_keys) +- [map_values](#map_values) ### `map` @@ -3729,6 +3731,58 @@ SELECT map_extract(MAP {'a': 1, 'b': NULL, 'c': 3}, 'a'); - element_at + +### `map_keys` + +Return a list of all keys in the map. + +``` +map_keys(map) +``` + +#### Arguments + +- `map`: Map expression. + Can be a constant, column, or function, and any combination of map operators. + +#### Example + +``` +SELECT map_keys(MAP {'a': 1, 'b': NULL, 'c': 3}); +---- +[a, b, c] + +select map_keys(map([100, 5], [42,43])); +---- +[100, 5] +``` + + +### `map_values` + +Return a list of all values in the map. + +``` +map_values(map) +``` + +#### Arguments + +- `map`: Map expression. + Can be a constant, column, or function, and any combination of map operators. + +#### Example + +``` +SELECT map_values(MAP {'a': 1, 'b': NULL, 'c': 3}); +---- +[1, , 3] + +select map_keys(map([100, 5], [42,43])); +---- +[42, 43] +``` + ## Hashing Functions - [digest](#digest) From 44fac054f83ac914631f6d834705785995a97d19 Mon Sep 17 00:00:00 2001 From: Dharan Aditya Date: Fri, 30 Aug 2024 01:57:38 +0530 Subject: [PATCH 12/16] trying to make prettier happy --- docs/source/user-guide/sql/scalar_functions.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index 118a08a9428e..456e9d3c7feb 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -3731,7 +3731,6 @@ SELECT map_extract(MAP {'a': 1, 'b': NULL, 'c': 3}, 'a'); - element_at - ### `map_keys` Return a list of all keys in the map. @@ -3757,7 +3756,6 @@ select map_keys(map([100, 5], [42,43])); [100, 5] ``` - ### `map_values` Return a list of all values in the map. From f645c7eaa734332e8b157abc1c9a58238bc322ca Mon Sep 17 00:00:00 2001 From: Dharan Aditya Date: Fri, 30 Aug 2024 09:09:43 +0530 Subject: [PATCH 13/16] Update scalar_functions.md Co-authored-by: Alex Huang --- docs/source/user-guide/sql/scalar_functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index 456e9d3c7feb..f8602a102f3b 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -3776,7 +3776,7 @@ SELECT map_values(MAP {'a': 1, 'b': NULL, 'c': 3}); ---- [1, , 3] -select map_keys(map([100, 5], [42,43])); +select map_values(map([100, 5], [42,43])); ---- [42, 43] ``` From 0fd73137189879a8134fe83481f37979821506ab Mon Sep 17 00:00:00 2001 From: Dharan Aditya Date: Fri, 30 Aug 2024 22:47:12 +0530 Subject: [PATCH 14/16] reface signature --- datafusion/functions-nested/src/map_keys.rs | 20 ++++++++----------- datafusion/functions-nested/src/map_values.rs | 20 ++++++++----------- 2 files changed, 16 insertions(+), 24 deletions(-) diff --git a/datafusion/functions-nested/src/map_keys.rs b/datafusion/functions-nested/src/map_keys.rs index 20daadb5002c..0b1cebb27c86 100644 --- a/datafusion/functions-nested/src/map_keys.rs +++ b/datafusion/functions-nested/src/map_keys.rs @@ -21,7 +21,10 @@ use crate::utils::{get_map_entry_field, make_scalar_function}; use arrow_array::{Array, ArrayRef, ListArray}; use arrow_schema::{DataType, Field}; use datafusion_common::{cast::as_map_array, exec_err, Result}; -use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; +use datafusion_expr::{ + ArrayFunctionSignature, ColumnarValue, ScalarUDFImpl, Signature, TypeSignature, + Volatility, +}; use std::any::Any; use std::sync::Arc; @@ -41,7 +44,10 @@ pub(crate) struct MapKeysFunc { impl MapKeysFunc { pub fn new() -> Self { Self { - signature: Signature::user_defined(Volatility::Immutable), + signature: Signature::new( + TypeSignature::ArraySignature(ArrayFunctionSignature::MapArray), + Volatility::Immutable, + ), } } } @@ -75,16 +81,6 @@ impl ScalarUDFImpl for MapKeysFunc { fn invoke(&self, args: &[ColumnarValue]) -> datafusion_common::Result { make_scalar_function(map_keys_inner)(args) } - - fn coerce_types( - &self, - arg_types: &[DataType], - ) -> datafusion_common::Result> { - if arg_types.len() != 1 { - return exec_err!("map_keys expects single argument"); - } - Ok(vec![arg_types[0].clone()]) - } } fn map_keys_inner(args: &[ArrayRef]) -> Result { diff --git a/datafusion/functions-nested/src/map_values.rs b/datafusion/functions-nested/src/map_values.rs index 57a92c166be8..ba14f5a151b3 100644 --- a/datafusion/functions-nested/src/map_values.rs +++ b/datafusion/functions-nested/src/map_values.rs @@ -21,7 +21,10 @@ use crate::utils::{get_map_entry_field, make_scalar_function}; use arrow_array::{Array, ArrayRef, ListArray}; use arrow_schema::{DataType, Field}; use datafusion_common::{cast::as_map_array, exec_err, Result}; -use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; +use datafusion_expr::{ + ArrayFunctionSignature, ColumnarValue, ScalarUDFImpl, Signature, TypeSignature, + Volatility, +}; use std::any::Any; use std::sync::Arc; @@ -41,7 +44,10 @@ pub(crate) struct MapValuesFunc { impl MapValuesFunc { pub fn new() -> Self { Self { - signature: Signature::user_defined(Volatility::Immutable), + signature: Signature::new( + TypeSignature::ArraySignature(ArrayFunctionSignature::MapArray), + Volatility::Immutable, + ), } } } @@ -75,16 +81,6 @@ impl ScalarUDFImpl for MapValuesFunc { fn invoke(&self, args: &[ColumnarValue]) -> datafusion_common::Result { make_scalar_function(map_values_inner)(args) } - - fn coerce_types( - &self, - arg_types: &[DataType], - ) -> datafusion_common::Result> { - if arg_types.len() != 1 { - return exec_err!("map_values expects single argument"); - } - Ok(vec![arg_types[0].clone()]) - } } fn map_values_inner(args: &[ArrayRef]) -> Result { From f15f7f261aef920352c3354428df8540a0cbb2b6 Mon Sep 17 00:00:00 2001 From: Dharan Aditya Date: Sat, 31 Aug 2024 14:23:07 +0530 Subject: [PATCH 15/16] format docs --- docs/source/user-guide/sql/scalar_functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index f8602a102f3b..5c383b1f5629 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -3749,7 +3749,7 @@ map_keys(map) ``` SELECT map_keys(MAP {'a': 1, 'b': NULL, 'c': 3}); ---- -[a, b, c] +[a, b, c] select map_keys(map([100, 5], [42,43])); ---- From 7d4696cff559f45a1f526f3bf54033edf2223d1e Mon Sep 17 00:00:00 2001 From: Dharan Aditya Date: Sat, 31 Aug 2024 15:12:10 +0530 Subject: [PATCH 16/16] Update map_values.rs Co-authored-by: Alex Huang --- datafusion/functions-nested/src/map_values.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/functions-nested/src/map_values.rs b/datafusion/functions-nested/src/map_values.rs index ba14f5a151b3..58c0d74eed5f 100644 --- a/datafusion/functions-nested/src/map_values.rs +++ b/datafusion/functions-nested/src/map_values.rs @@ -85,7 +85,7 @@ impl ScalarUDFImpl for MapValuesFunc { fn map_values_inner(args: &[ArrayRef]) -> Result { if args.len() != 1 { - return exec_err!("map_keys expects single argument"); + return exec_err!("map_values expects single argument"); } let map_array = match args[0].data_type() {