From 218ff7be82c84e43891211f0c45ca3b23e2faca5 Mon Sep 17 00:00:00 2001 From: Xin Li Date: Wed, 28 Aug 2024 22:27:57 +0800 Subject: [PATCH] Refactoring regexp_count --- Cargo.toml | 2 +- datafusion-cli/Cargo.toml | 2 +- datafusion-examples/examples/regex_count.rs | 33 + datafusion/core/Cargo.toml | 2 +- datafusion/functions/src/regex/regexpcount.rs | 639 +++++++++++------- datafusion/proto-common/Cargo.toml | 2 +- datafusion/proto-common/gen/Cargo.toml | 2 +- datafusion/proto/Cargo.toml | 2 +- datafusion/proto/gen/Cargo.toml | 2 +- datafusion/substrait/Cargo.toml | 2 +- 10 files changed, 418 insertions(+), 270 deletions(-) create mode 100644 datafusion-examples/examples/regex_count.rs diff --git a/Cargo.toml b/Cargo.toml index fb6545c5bc4c..ae344a46a1bd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -58,7 +58,7 @@ homepage = "https://datafusion.apache.org" license = "Apache-2.0" readme = "README.md" repository = "https://github.com/apache/datafusion" -rust-version = "1.80" +rust-version = "1.76" version = "41.0.0" [workspace.dependencies] diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml index 57a6c75dc6a2..0a4523a1c04e 100644 --- a/datafusion-cli/Cargo.toml +++ b/datafusion-cli/Cargo.toml @@ -26,7 +26,7 @@ license = "Apache-2.0" homepage = "https://datafusion.apache.org" repository = "https://github.com/apache/datafusion" # Specify MSRV here as `cargo msrv` doesn't support workspace version -rust-version = "1.80" +rust-version = "1.76" readme = "README.md" [dependencies] diff --git a/datafusion-examples/examples/regex_count.rs b/datafusion-examples/examples/regex_count.rs new file mode 100644 index 000000000000..93ec705ff6cc --- /dev/null +++ b/datafusion-examples/examples/regex_count.rs @@ -0,0 +1,33 @@ +use datafusion::common::Result; +use datafusion::prelude::{CsvReadOptions, SessionContext}; + +#[tokio::main] +async fn main() -> Result<()> { + let ctx = SessionContext::new(); + ctx.register_csv( + "examples", + "../../datafusion/physical-expr/tests/data/regex.csv", + CsvReadOptions::new(), + ) + .await?; + + // + // + //regexp_count examples + // + // + // regexp_count format is (regexp_count(text, regex[, flags]) + // + + // use sql and regexp_count function to test col 'values', against patterns in col 'patterns' without flags + let result = ctx + .sql("select regexp_count(values, patterns) from examples") + .await? + .collect() + .await?; + + println!("{:?}", result); + assert_eq!(result.len(), 1); + + Ok(()) +} diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml index 625c1067e495..adbba3eb31d6 100644 --- a/datafusion/core/Cargo.toml +++ b/datafusion/core/Cargo.toml @@ -30,7 +30,7 @@ authors = { workspace = true } # Specify MSRV here as `cargo msrv` doesn't support workspace version and fails with # "Unable to find key 'package.rust-version' (or 'package.metadata.msrv') in 'arrow-datafusion/Cargo.toml'" # https://github.com/foresterre/cargo-msrv/issues/590 -rust-version = "1.80" +rust-version = "1.76" [lints] workspace = true diff --git a/datafusion/functions/src/regex/regexpcount.rs b/datafusion/functions/src/regex/regexpcount.rs index 246d326fd0c8..2b7805c40915 100644 --- a/datafusion/functions/src/regex/regexpcount.rs +++ b/datafusion/functions/src/regex/regexpcount.rs @@ -15,26 +15,24 @@ // specific language governing permissions and limitations // under the License. -use std::iter::repeat; -use std::sync::Arc; - -use arrow::array::{ - Array, ArrayRef, AsArray, Datum, GenericStringArray, Int64Array, OffsetSizeTrait, - Scalar, +use arrow::array::{Array, ArrayRef, Int64Array, OffsetSizeTrait}; +use arrow::datatypes::DataType; +use arrow::datatypes::{ + DataType::Int64, DataType::LargeUtf8, DataType::Utf8, DataType::Utf8View, }; -use arrow::datatypes::DataType::{self, Int64, LargeUtf8, Utf8, Utf8View}; -use arrow::datatypes::Int64Type; use arrow::error::ArrowError; -use datafusion_common::cast::{as_generic_string_array, as_primitive_array}; -use datafusion_common::{ - arrow_err, exec_err, internal_err, DataFusionError, Result, ScalarValue, +use datafusion_common::cast::{as_generic_string_array, as_int64_array}; +use datafusion_common::{exec_err, internal_err, Result, ScalarValue}; +use datafusion_expr::{ + ColumnarValue, ScalarUDFImpl, Signature, TypeSignature::Exact, + TypeSignature::Uniform, Volatility, }; -use datafusion_expr::TypeSignature::{Exact, Uniform}; -use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; use itertools::izip; use regex::Regex; +use std::collections::hash_map::Entry; +use std::collections::HashMap; +use std::sync::Arc; -/// regexp_count(string, pattern [, start [, flags ]]) -> int #[derive(Debug)] pub struct RegexpCountFunc { signature: Signature, @@ -78,13 +76,8 @@ impl ScalarUDFImpl for RegexpCountFunc { &self.signature } - fn return_type(&self, arg_types: &[DataType]) -> Result { - use DataType::*; - - Ok(match &arg_types[0] { - Null => Null, - _ => Int64, - }) + fn return_type(&self, _arg_types: &[DataType]) -> Result { + Ok(Int64) } fn invoke(&self, args: &[datafusion_expr::ColumnarValue]) -> Result { @@ -115,250 +108,388 @@ impl ScalarUDFImpl for RegexpCountFunc { fn regexp_count_func(args: &[ArrayRef]) -> Result { match args[0].data_type() { - DataType::Utf8 => regexp_count::(args), - DataType::LargeUtf8 => regexp_count::(args), + Utf8 => regexp_count::(args), + LargeUtf8 => regexp_count::(args), other => { internal_err!("Unsupported data type {other:?} for function regexp_count") } } } +/// This function `regexp_count` is responsible for counting the occurrences of a regular expression pattern +/// within a string array. It supports optional start positions and flags for case insensitivity. +/// +/// The function accepts a variable number of arguments: +/// - `values`: The array of strings to search within. +/// - `regex_array`: The array of regular expression patterns to search for. +/// - `start_array` (optional): The array of start positions for the search. +/// - `flags_array` (optional): The array of flags to modify the search behavior (e.g., case insensitivity). +/// +/// The function handles different combinations of scalar and array inputs for the regex patterns, start positions, +/// and flags. It uses a cache to store compiled regular expressions for efficiency. +/// +/// # Errors +/// Returns an error if the input arrays have mismatched lengths or if the regular expression fails to compile. pub fn regexp_count(args: &[ArrayRef]) -> Result { - let arg_len = args.len(); + let args_len = args.len(); + if !(2..=4).contains(&args_len) { + return exec_err!("regexp_count was called with {args_len} arguments. It requires at least 2 and at most 4."); + } - match arg_len { - 2..=4 => { - let values = as_generic_string_array::(&args[0])?; - let regex = as_generic_string_array::(&args[1])?; + let values = as_generic_string_array::(&args[0])?; + let regex_array = as_generic_string_array::(&args[1])?; - if values.is_empty() || regex.is_empty() { - return Ok(Arc::new(Int64Array::new_null(0))); - } + let (regex_scalar, is_regex_scalar) = if regex_array.len() == 1 { + (Some(regex_array.value(0)), true) + } else { + (None, false) + }; - let regex_datum: &dyn Datum = if regex.len() != 1 { - regex - } else { - &Scalar::new(regex) - }; - let start_scalar: Scalar<&Int64Array>; - let start_array_datum: Option<&dyn Datum> = if arg_len > 2 { - let start_array = as_primitive_array::(&args[2])?; - if start_array.is_empty() { - None - } else if start_array.len() != 1 { - Some(start_array as &dyn Datum) - } else { - start_scalar = Scalar::new(start_array); - Some(&start_scalar as &dyn Datum) + let (start_array, start_scalar, is_start_scalar) = if args.len() > 2 { + let start = as_int64_array(&args[2])?; + if start.len() == 1 { + (None, Some(start.value(0)), true) + } else { + (Some(start), None, false) + } + } else { + (None, Some(1), true) + }; + + let (flags_array, flags_scalar, is_flags_scalar) = if args.len() > 3 { + let flags = as_generic_string_array::(&args[3])?; + if flags.len() == 1 { + (None, Some(flags.value(0)), true) + } else { + (Some(flags), None, false) + } + } else { + (None, None, true) + }; + + match (is_regex_scalar, is_start_scalar, is_flags_scalar) { + (true, true, true) => { + let regex = match regex_scalar { + None | Some("") => { + return Ok(Arc::new(Int64Array::from(vec![0; values.len()]))) } - } else { - None + Some(regex) => regex, }; - let flags_scalar: Scalar<&GenericStringArray>; - let flags_array_datum: Option<&dyn Datum> = if arg_len > 3 { - let flags_array = as_generic_string_array::(&args[3])?; - if flags_array.is_empty() { - None - } else if flags_array.len() != 1 { - Some(flags_array as &dyn Datum) - } else { - flags_scalar = Scalar::new(flags_array); - Some(&flags_scalar as &dyn Datum) + let pattern = compile_regex(regex, flags_scalar)?; + + Ok(Arc::new(Int64Array::from_iter_values( + values + .iter() + .map(|value| count_matches(value, &pattern, start_scalar)) + .collect::>>()?, + ))) + } + (true, true, false) => { + let regex = match regex_scalar { + None | Some("") => { + return Ok(Arc::new(Int64Array::from(vec![0; values.len()]))) } - } else { - None + Some(regex) => regex, }; - Ok(regexp_array_count( - values, - regex_datum, - start_array_datum, - flags_array_datum, - ) - .map(|x| Arc::new(x) as ArrayRef)?) - } - other => { - exec_err!( - "regexp_count was called with {other} arguments. It requires at least 2 and at most 4." - ) + let flags_array = flags_array.unwrap(); + if values.len() != flags_array.len() { + return exec_err!( + "flags_array must be the same length as values array; got {} and {}", + values.len(), + flags_array.len() + ); + } + + let mut regex_cache = HashMap::new(); + Ok(Arc::new(Int64Array::from_iter_values( + values + .iter() + .zip(flags_array.iter()) + .map(|(value, flags)| { + let pattern = + compile_and_cache_regex(regex, flags, &mut regex_cache)?; + count_matches(value, &pattern, start_scalar) + }) + .collect::>>()?, + ))) } - } -} + (true, false, true) => { + let regex = match regex_scalar { + None | Some("") => { + return Ok(Arc::new(Int64Array::from(vec![0; values.len()]))) + } + Some(regex) => regex, + }; -pub fn regexp_array_count( - array: &GenericStringArray, - regex_array: &dyn Datum, - start_array: Option<&dyn Datum>, - flags_array: Option<&dyn Datum>, -) -> Result { - let (rhs, is_rhs_scalar) = regex_array.get(); - - if array.data_type() != rhs.data_type() { - return arrow_err!(ArrowError::ComputeError( - "regexp_count() requires pattern to be either Utf8 or LargeUtf8".to_string(), - )); - } + let pattern = compile_regex(regex, flags_scalar)?; - if !is_rhs_scalar && array.len() != rhs.len() { - return arrow_err!( - ArrowError::ComputeError( - "regexp_count() requires pattern to be either a scalar or the same length as the input array".to_string(), - ) - ); - } + let start_array = start_array.unwrap(); - let (starts, is_starts_scalar) = match start_array { - Some(starts) => starts.get(), - None => (&Int64Array::from(vec![1]) as &dyn Array, true), - }; + Ok(Arc::new(Int64Array::from_iter_values( + values + .iter() + .zip(start_array.iter()) + .map(|(value, start)| count_matches(value, &pattern, start)) + .collect::>>()?, + ))) + } + (true, false, false) => { + let regex = match regex_scalar { + None | Some("") => { + return Ok(Arc::new(Int64Array::from(vec![0; values.len()]))) + } + Some(regex) => regex, + }; - if *starts.data_type() != Int64 { - return arrow_err!(ArrowError::ComputeError( - "regexp_count() requires start to be Int64".to_string(), - )); - } + let flags_array = flags_array.unwrap(); + if values.len() != flags_array.len() { + return exec_err!( + "flags_array must be the same length as values array; got {} and {}", + values.len(), + flags_array.len() + ); + } - if !is_starts_scalar && array.len() != starts.len() { - return arrow_err!( - ArrowError::ComputeError( - "regexp_count() requires start to be either a scalar or the same length as the input array".to_string(), - ) - ); - } + let mut regex_cache = HashMap::new(); + Ok(Arc::new(Int64Array::from_iter_values( + izip!( + values.iter(), + start_array.unwrap().iter(), + flags_array.iter() + ) + .map(|(value, start, flags)| { + let pattern = + compile_and_cache_regex(regex, flags, &mut regex_cache)?; + + count_matches(value, &pattern, start) + }) + .collect::>>()?, + ))) + } + (false, true, true) => { + if values.len() != regex_array.len() { + return exec_err!( + "regex_array must be the same length as values array; got {} and {}", + values.len(), + regex_array.len() + ); + } - let (flags, is_flags_scalar) = match flags_array { - Some(flags) => flags.get(), - None => ( - &GenericStringArray::::from(vec![None as Option<&str>]) as &dyn Array, - true, - ), - }; + let mut regex_cache = HashMap::new(); + Ok(Arc::new(Int64Array::from_iter_values( + values + .iter() + .zip(regex_array.iter()) + .map(|(value, regex)| { + let regex = match regex { + None | Some("") => return Ok(0), + Some(regex) => regex, + }; + + let pattern = compile_and_cache_regex( + regex, + flags_scalar, + &mut regex_cache, + )?; + count_matches(value, &pattern, start_scalar) + }) + .collect::>>()?, + ))) + } + (false, true, false) => { + if values.len() != regex_array.len() { + return exec_err!( + "regex_array must be the same length as values array; got {} and {}", + values.len(), + regex_array.len() + ); + } - if !is_flags_scalar && array.len() != flags.len() { - return arrow_err!( - ArrowError::ComputeError( - "regexp_count() requires flags to be either a scalar or the same length as the input array".to_string(), - ) - ); - } + let flags_array = flags_array.unwrap(); + if values.len() != flags_array.len() { + return exec_err!( + "flags_array must be the same length as values array; got {} and {}", + values.len(), + flags_array.len() + ); + } - let regex_iter: Box>> = if is_rhs_scalar { - let regex: &arrow::array::GenericByteArray< - arrow::datatypes::GenericStringType, - > = rhs.as_string::(); - let regex = regex.is_valid(0).then(|| regex.value(0)); - if regex.is_none() { - return Ok(Int64Array::from( - repeat(0).take(array.len()).collect::>(), - )); + let mut regex_cache = HashMap::new(); + Ok(Arc::new(Int64Array::from_iter_values( + izip!(values.iter(), regex_array.iter(), flags_array.iter()) + .map(|(value, regex, flags)| { + let regex = match regex { + None | Some("") => return Ok(0), + Some(regex) => regex, + }; + + let pattern = + compile_and_cache_regex(regex, flags, &mut regex_cache)?; + + count_matches(value, &pattern, start_scalar) + }) + .collect::>>()?, + ))) } + (false, false, true) => { + if values.len() != regex_array.len() { + return exec_err!( + "regex_array must be the same length as values array; got {} and {}", + values.len(), + regex_array.len() + ); + } - Box::new(repeat(regex)) - } else { - Box::new(rhs.as_string::().iter()) - }; + let start_array = start_array.unwrap(); + if values.len() != start_array.len() { + return exec_err!( + "start_array must be the same length as values array; got {} and {}", + values.len(), + start_array.len() + ); + } - let start_iter: Box> = if is_starts_scalar { - let start = starts.as_primitive::(); - let start = start.is_valid(0).then(|| start.value(0)); - Box::new(repeat(start.unwrap_or(1))) - } else { - Box::new( - starts - .as_primitive::() - .iter() - .map(|x| x.unwrap_or(1)), - ) - }; + let mut regex_cache = HashMap::new(); + Ok(Arc::new(Int64Array::from_iter_values( + izip!(values.iter(), regex_array.iter(), start_array.iter()) + .map(|(value, regex, start)| { + let regex = match regex { + None | Some("") => return Ok(0), + Some(regex) => regex, + }; + + let pattern = compile_and_cache_regex( + regex, + flags_scalar, + &mut regex_cache, + )?; + count_matches(value, &pattern, start) + }) + .collect::>>()?, + ))) + } + (false, false, false) => { + if values.len() != regex_array.len() { + return exec_err!( + "regex_array must be the same length as values array; got {} and {}", + values.len(), + regex_array.len() + ); + } - let flags_iter: Box>> = if is_flags_scalar { - let flags = flags.as_string::(); - let flags = flags - .is_valid(0) - .then(|| flags.value(0)) - .map(|x| { - if x.contains('g') { - return arrow_err!(ArrowError::ComputeError( - "regexp_count() does not support global flag".to_string(), - )); - } - Ok(x) - }) - .transpose()?; + let start_array = start_array.unwrap(); + if values.len() != start_array.len() { + return exec_err!( + "start_array must be the same length as values array; got {} and {}", + values.len(), + start_array.len() + ); + } - Box::new(repeat(flags)) - } else { - Box::new(flags.as_string::().iter()) - }; + let flags_array = flags_array.unwrap(); + if values.len() != flags_array.len() { + return exec_err!( + "flags_array must be the same length as values array; got {} and {}", + values.len(), + flags_array.len() + ); + } - regex_iter_count(array.iter(), regex_iter, start_iter, flags_iter) + let mut regex_cache = HashMap::new(); + Ok(Arc::new(Int64Array::from_iter_values( + izip!( + values.iter(), + regex_array.iter(), + start_array.iter(), + flags_array.iter() + ) + .map(|(value, regex, start, flags)| { + let regex = match regex { + None | Some("") => return Ok(0), + Some(regex) => regex, + }; + + let pattern = + compile_and_cache_regex(regex, flags, &mut regex_cache)?; + count_matches(value, &pattern, start) + }) + .collect::>>()?, + ))) + } + } } -fn regex_iter_count<'a>( - array: impl Iterator>, - regex: impl Iterator>, - start: impl Iterator, - flags: impl Iterator>, -) -> Result { - Ok(Int64Array::from( - izip!(array, regex, start, flags) - .map(|(array, regex, start, flags)| { - if array.is_none() || regex.is_none() { - return Ok(0); - } +fn compile_and_cache_regex( + regex: &str, + flags: Option<&str>, + regex_cache: &mut HashMap, +) -> Result { + match regex_cache.entry(regex.to_string()) { + Entry::Vacant(entry) => { + let compiled = compile_regex(regex, flags)?; + entry.insert(compiled.clone()); + Ok(compiled) + } + Entry::Occupied(entry) => Ok(entry.get().to_owned()), + } +} - let regex = regex.unwrap(); - if regex.is_empty() { - return Ok(0); - } +fn compile_regex(regex: &str, flags: Option<&str>) -> Result { + let pattern = match flags { + None | Some("") => regex.to_string(), + Some(flags) => { + if flags.contains("g") { + return Err(ArrowError::ComputeError( + "regexp_count() does not support global flag".to_string(), + ) + .into()); + } + format!("(?{}){}", flags, regex) + } + }; - if start <= 0 { - return Err(ArrowError::ComputeError( - "regexp_count() requires start to be 1 based".to_string(), - )); - } + Regex::new(&pattern).map_err(|_| { + ArrowError::ComputeError(format!( + "Regular expression did not compile: {}", + pattern + )) + .into() + }) +} - let array = array.unwrap(); - let start = start as usize; - if start > array.len() { - return Ok(0); - } +fn count_matches( + value: Option<&str>, + pattern: &Regex, + start: Option, +) -> Result { + let value = match value { + None | Some("") => return Ok(0), + Some(value) => value, + }; - let pattern = if let Some(Some(flags)) = - flags.map(|x| if x.is_empty() { None } else { Some(x) }) - { - if flags.contains('g') { - return Err(ArrowError::ComputeError( - "regexp_count() does not support global flag".to_string(), - )); - } - - format!("(?{flags}){regex}") - } else { - regex.to_string() - }; - - let Ok(re) = Regex::new(pattern.as_str()) else { - return Err(ArrowError::ComputeError(format!( - "Regular expression did not compile: {}", - pattern - ))); - }; - - Ok(re - .find_iter(&array.chars().skip(start - 1).collect::()) - .count() as i64) - }) - .collect::, ArrowError>>()?, - )) + if let Some(start) = start { + if start < 1 { + return Err(ArrowError::ComputeError( + "regexp_count() requires start to be 1 based".to_string(), + ) + .into()); + } + + let find_slice = value.chars().skip(start as usize - 1).collect::(); + let count = pattern.find_iter(find_slice.as_str()).count(); + Ok(count as i64) + } else { + let count = pattern.find_iter(value).count(); + Ok(count as i64) + } } #[cfg(test)] mod tests { - use crate::regex::regexpcount::regexp_count; - use arrow::array::{ArrayRef, GenericStringArray, Int64Array, OffsetSizeTrait}; - use std::sync::Arc; + use super::*; + use arrow::array::GenericStringArray; #[test] fn test_regexp_count() { @@ -399,11 +530,7 @@ mod tests { let expected = Int64Array::from(vec![0, 1, 2, 1, 3]); - let re = regexp_count::(&[ - Arc::new(values) as ArrayRef, - Arc::new(regex) as ArrayRef, - ]) - .unwrap(); + let re = regexp_count::(&[Arc::new(values), Arc::new(regex)]).unwrap(); assert_eq!(re.as_ref(), &expected); } @@ -420,12 +547,8 @@ mod tests { let expected = Int64Array::from(vec![0, 1, 1, 0, 2]); - let re = regexp_count::(&[ - Arc::new(values) as ArrayRef, - Arc::new(regex) as ArrayRef, - Arc::new(start) as ArrayRef, - ]) - .unwrap(); + let re = regexp_count::(&[Arc::new(values), Arc::new(regex), Arc::new(start)]) + .unwrap(); assert_eq!(re.as_ref(), &expected); } @@ -444,10 +567,10 @@ mod tests { let expected = Int64Array::from(vec![0, 1, 2, 2, 3]); let re = regexp_count::(&[ - Arc::new(values) as ArrayRef, - Arc::new(regex) as ArrayRef, - Arc::new(start) as ArrayRef, - Arc::new(flags) as ArrayRef, + Arc::new(values), + Arc::new(regex), + Arc::new(start), + Arc::new(flags), ]) .unwrap(); assert_eq!(re.as_ref(), &expected); @@ -465,11 +588,7 @@ mod tests { let expected = Int64Array::from(vec![0, 1, 2, 2, 2]); - let re = regexp_count::(&[ - Arc::new(values) as ArrayRef, - Arc::new(regex) as ArrayRef, - ]) - .unwrap(); + let re = regexp_count::(&[Arc::new(values), Arc::new(regex)]).unwrap(); assert_eq!(re.as_ref(), &expected); } @@ -486,12 +605,8 @@ mod tests { let expected = Int64Array::from(vec![0, 0, 1, 1, 0]); - let re = regexp_count::(&[ - Arc::new(values) as ArrayRef, - Arc::new(regex) as ArrayRef, - Arc::new(start) as ArrayRef, - ]) - .unwrap(); + let re = regexp_count::(&[Arc::new(values), Arc::new(regex), Arc::new(start)]) + .unwrap(); assert_eq!(re.as_ref(), &expected); } @@ -510,10 +625,10 @@ mod tests { let expected = Int64Array::from(vec![0, 1, 2, 2, 3]); let re = regexp_count::(&[ - Arc::new(values) as ArrayRef, - Arc::new(regex) as ArrayRef, - Arc::new(start) as ArrayRef, - Arc::new(flags) as ArrayRef, + Arc::new(values), + Arc::new(regex), + Arc::new(start), + Arc::new(flags), ]) .unwrap(); assert_eq!(re.as_ref(), &expected); @@ -534,10 +649,10 @@ mod tests { let expected = Int64Array::from(vec![0, 0, 0, 2, 1]); let re = regexp_count::(&[ - Arc::new(values) as ArrayRef, - Arc::new(regex) as ArrayRef, - Arc::new(start) as ArrayRef, - Arc::new(flags) as ArrayRef, + Arc::new(values), + Arc::new(regex), + Arc::new(start), + Arc::new(flags), ]) .unwrap(); assert_eq!(re.as_ref(), &expected); @@ -558,10 +673,10 @@ mod tests { let expected = Int64Array::from(vec![0, 1, 1, 1, 1]); let re = regexp_count::(&[ - Arc::new(values) as ArrayRef, - Arc::new(regex) as ArrayRef, - Arc::new(start) as ArrayRef, - Arc::new(flags) as ArrayRef, + Arc::new(values), + Arc::new(regex), + Arc::new(start), + Arc::new(flags), ]) .unwrap(); assert_eq!(re.as_ref(), &expected); diff --git a/datafusion/proto-common/Cargo.toml b/datafusion/proto-common/Cargo.toml index 9b2f15a9a710..e5d65827cdec 100644 --- a/datafusion/proto-common/Cargo.toml +++ b/datafusion/proto-common/Cargo.toml @@ -26,7 +26,7 @@ homepage = { workspace = true } repository = { workspace = true } license = { workspace = true } authors = { workspace = true } -rust-version = "1.80" +rust-version = "1.76" # Exclude proto files so crates.io consumers don't need protoc exclude = ["*.proto"] diff --git a/datafusion/proto-common/gen/Cargo.toml b/datafusion/proto-common/gen/Cargo.toml index bb03208b2b70..54ec0e44694b 100644 --- a/datafusion/proto-common/gen/Cargo.toml +++ b/datafusion/proto-common/gen/Cargo.toml @@ -20,7 +20,7 @@ name = "gen-common" description = "Code generation for proto" version = "0.1.0" edition = { workspace = true } -rust-version = "1.80" +rust-version = "1.76" authors = { workspace = true } homepage = { workspace = true } repository = { workspace = true } diff --git a/datafusion/proto/Cargo.toml b/datafusion/proto/Cargo.toml index 4203bd7a28c0..95d9e6700a50 100644 --- a/datafusion/proto/Cargo.toml +++ b/datafusion/proto/Cargo.toml @@ -27,7 +27,7 @@ repository = { workspace = true } license = { workspace = true } authors = { workspace = true } # Specify MSRV here as `cargo msrv` doesn't support workspace version -rust-version = "1.80" +rust-version = "1.76" # Exclude proto files so crates.io consumers don't need protoc exclude = ["*.proto"] diff --git a/datafusion/proto/gen/Cargo.toml b/datafusion/proto/gen/Cargo.toml index e69282540cb2..401c51c94563 100644 --- a/datafusion/proto/gen/Cargo.toml +++ b/datafusion/proto/gen/Cargo.toml @@ -20,7 +20,7 @@ name = "gen" description = "Code generation for proto" version = "0.1.0" edition = { workspace = true } -rust-version = "1.80" +rust-version = "1.76" authors = { workspace = true } homepage = { workspace = true } repository = { workspace = true } diff --git a/datafusion/substrait/Cargo.toml b/datafusion/substrait/Cargo.toml index 0647263225c4..9e7ef9632ad3 100644 --- a/datafusion/substrait/Cargo.toml +++ b/datafusion/substrait/Cargo.toml @@ -26,7 +26,7 @@ repository = { workspace = true } license = { workspace = true } authors = { workspace = true } # Specify MSRV here as `cargo msrv` doesn't support workspace version -rust-version = "1.80" +rust-version = "1.76" [lints] workspace = true