From 07312be60b32421eac49e0af9dbd1ebdbd3b593e Mon Sep 17 00:00:00 2001 From: Xin Li Date: Thu, 29 Aug 2024 12:11:39 +0800 Subject: [PATCH] Refactoring regexp_count --- datafusion/functions/benches/regx.rs | 30 +- datafusion/functions/src/regex/regexpcount.rs | 442 +++++++++++------- 2 files changed, 288 insertions(+), 184 deletions(-) diff --git a/datafusion/functions/benches/regx.rs b/datafusion/functions/benches/regx.rs index dd902400d3a4..62fe4f53038d 100644 --- a/datafusion/functions/benches/regx.rs +++ b/datafusion/functions/benches/regx.rs @@ -19,8 +19,10 @@ extern crate criterion; use arrow::array::builder::StringBuilder; use arrow::array::{ArrayRef, Int64Array, StringArray}; +use arrow::compute::cast; +use arrow::datatypes::DataType; use criterion::{black_box, criterion_group, criterion_main, Criterion}; -use datafusion_functions::regex::regexpcount::regexp_count; +use datafusion_functions::regex::regexpcount::regexp_count_func; use datafusion_functions::regex::regexplike::regexp_like; use datafusion_functions::regex::regexpmatch::regexp_match; use datafusion_functions::regex::regexpreplace::regexp_replace; @@ -85,7 +87,7 @@ fn flags(rng: &mut ThreadRng) -> StringArray { } fn criterion_benchmark(c: &mut Criterion) { - c.bench_function("regexp_count_1000", |b| { + c.bench_function("regexp_count_1000 string", |b| { let mut rng = rand::thread_rng(); let data = Arc::new(data(&mut rng)) as ArrayRef; let regex = Arc::new(regex(&mut rng)) as ArrayRef; @@ -94,13 +96,33 @@ fn criterion_benchmark(c: &mut Criterion) { b.iter(|| { black_box( - regexp_count::(&[ + regexp_count_func(&[ Arc::clone(&data), Arc::clone(®ex), Arc::clone(&start), Arc::clone(&flags), ]) - .expect("regexp_count should work on valid values"), + .expect("regexp_count should work on utf8"), + ) + }) + }); + + c.bench_function("regexp_count_1000 utf8view", |b| { + let mut rng = rand::thread_rng(); + let data = cast(&data(&mut rng), &DataType::Utf8View).unwrap(); + let regex = cast(®ex(&mut rng), &DataType::Utf8View).unwrap(); + let start = Arc::new(start(&mut rng)) as ArrayRef; + let flags = cast(&flags(&mut rng), &DataType::Utf8View).unwrap(); + + b.iter(|| { + black_box( + regexp_count_func(&[ + Arc::clone(&data), + Arc::clone(®ex), + Arc::clone(&start), + Arc::clone(&flags), + ]) + .expect("regexp_count should work on utf8view"), ) }) }); diff --git a/datafusion/functions/src/regex/regexpcount.rs b/datafusion/functions/src/regex/regexpcount.rs index 2b7805c40915..511481d5e892 100644 --- a/datafusion/functions/src/regex/regexpcount.rs +++ b/datafusion/functions/src/regex/regexpcount.rs @@ -15,13 +15,12 @@ // specific language governing permissions and limitations // under the License. -use arrow::array::{Array, ArrayRef, Int64Array, OffsetSizeTrait}; -use arrow::datatypes::DataType; +use arrow::array::{Array, ArrayRef, AsArray, Datum, Int64Array}; +use arrow::datatypes::{DataType, Int64Type}; use arrow::datatypes::{ DataType::Int64, DataType::LargeUtf8, DataType::Utf8, DataType::Utf8View, }; use arrow::error::ArrowError; -use datafusion_common::cast::{as_generic_string_array, as_int64_array}; use datafusion_common::{exec_err, internal_err, Result, ScalarValue}; use datafusion_expr::{ ColumnarValue, ScalarUDFImpl, Signature, TypeSignature::Exact, @@ -33,6 +32,8 @@ use std::collections::hash_map::Entry; use std::collections::HashMap; use std::sync::Arc; +use crate::string::common::StringArrayType; + #[derive(Debug)] pub struct RegexpCountFunc { signature: Signature, @@ -106,16 +107,32 @@ impl ScalarUDFImpl for RegexpCountFunc { } } -fn regexp_count_func(args: &[ArrayRef]) -> Result { - match args[0].data_type() { - Utf8 => regexp_count::(args), - LargeUtf8 => regexp_count::(args), +pub fn regexp_count_func(args: &[ArrayRef]) -> Result { + let args_len = args.len(); + if !(2..=4).contains(&args_len) { + return exec_err!("regexp_count was called with {args_len} arguments. It requires at least 2 and at most 4."); + } + + let values = &args[0]; + match values.data_type() { + Utf8 | LargeUtf8 | Utf8View => (), other => { - internal_err!("Unsupported data type {other:?} for function regexp_count") + return internal_err!( + "Unsupported data type {other:?} for function regexp_count" + ); } } + + regexp_count( + values, + &args[1], + if args_len > 2 { Some(&args[2]) } else { None }, + if args_len > 3 { Some(&args[3]) } else { None }, + ) + .map_err(|e| e.into()) } +/// `arrow-rs` style implementation of `regexp_count` function. /// This function `regexp_count` is responsible for counting the occurrences of a regular expression pattern /// within a string array. It supports optional start positions and flags for case insensitivity. /// @@ -130,42 +147,122 @@ fn regexp_count_func(args: &[ArrayRef]) -> Result { /// /// # Errors /// Returns an error if the input arrays have mismatched lengths or if the regular expression fails to compile. -pub fn regexp_count(args: &[ArrayRef]) -> Result { - let args_len = args.len(); - if !(2..=4).contains(&args_len) { - return exec_err!("regexp_count was called with {args_len} arguments. It requires at least 2 and at most 4."); +pub fn regexp_count( + values: &dyn Array, + regex_array: &dyn Datum, + start_array: Option<&dyn Datum>, + flags_array: Option<&dyn Datum>, +) -> Result { + let (regex_array, is_regex_scalar) = regex_array.get(); + let (start_array, is_start_scalar) = start_array.map_or((None, true), |start| { + let (start, is_start_scalar) = start.get(); + (Some(start), is_start_scalar) + }); + let (flags_array, is_flags_scalar) = flags_array.map_or((None, true), |flags| { + let (flags, is_flags_scalar) = flags.get(); + (Some(flags), is_flags_scalar) + }); + + match (values.data_type(), regex_array.data_type(), flags_array) { + (Utf8, Utf8, None) => regexp_count_inner( + values.as_string::(), + regex_array.as_string::(), + is_regex_scalar, + start_array.map(|start| start.as_primitive::()), + is_start_scalar, + None, + is_flags_scalar, + ), + (Utf8, Utf8, Some(flags_array)) if *flags_array.data_type() == Utf8 => regexp_count_inner( + values.as_string::(), + regex_array.as_string::(), + is_regex_scalar, + start_array.map(|start| start.as_primitive::()), + is_start_scalar, + Some(flags_array.as_string::()), + is_flags_scalar, + ), + (LargeUtf8, LargeUtf8, None) => regexp_count_inner( + values.as_string::(), + regex_array.as_string::(), + is_regex_scalar, + start_array.map(|start| start.as_primitive::()), + is_start_scalar, + None, + is_flags_scalar, + ), + (LargeUtf8, LargeUtf8, Some(flags_array)) if *flags_array.data_type() == LargeUtf8 => regexp_count_inner( + values.as_string::(), + regex_array.as_string::(), + is_regex_scalar, + start_array.map(|start| start.as_primitive::()), + is_start_scalar, + Some(flags_array.as_string::()), + is_flags_scalar, + ), + (Utf8View, Utf8View, None) => regexp_count_inner( + values.as_string_view(), + regex_array.as_string_view(), + is_regex_scalar, + start_array.map(|start| start.as_primitive::()), + is_start_scalar, + None, + is_flags_scalar, + ), + (Utf8View, Utf8View, Some(flags_array)) if *flags_array.data_type() == Utf8View => regexp_count_inner( + values.as_string_view(), + regex_array.as_string_view(), + is_regex_scalar, + start_array.map(|start| start.as_primitive::()), + is_start_scalar, + Some(flags_array.as_string_view()), + is_flags_scalar, + ), + _ => Err(ArrowError::ComputeError( + "regexp_count() expected the input arrays to be of type Utf8, LargeUtf8, or Utf8View and the data types of the values, regex_array, and flags_array to match".to_string(), + )), } +} - let values = as_generic_string_array::(&args[0])?; - let regex_array = as_generic_string_array::(&args[1])?; - - let (regex_scalar, is_regex_scalar) = if regex_array.len() == 1 { +pub fn regexp_count_inner<'a, S>( + values: S, + regex_array: S, + is_regex_scalar: bool, + start_array: Option<&Int64Array>, + is_start_scalar: bool, + flags_array: Option, + is_flags_scalar: bool, +) -> Result +where + S: StringArrayType<'a>, +{ + let (regex_scalar, is_regex_scalar) = if is_regex_scalar || regex_array.len() == 1 { (Some(regex_array.value(0)), true) } else { (None, false) }; - let (start_array, start_scalar, is_start_scalar) = if args.len() > 2 { - let start = as_int64_array(&args[2])?; - if start.len() == 1 { - (None, Some(start.value(0)), true) + let (start_array, start_scalar, is_start_scalar) = + if let Some(start_array) = start_array { + if is_start_scalar || start_array.len() == 1 { + (None, Some(start_array.value(0)), true) + } else { + (Some(start_array), None, false) + } } else { - (Some(start), None, false) - } - } else { - (None, Some(1), true) - }; - - let (flags_array, flags_scalar, is_flags_scalar) = if args.len() > 3 { - let flags = as_generic_string_array::(&args[3])?; - if flags.len() == 1 { - (None, Some(flags.value(0)), true) + (None, Some(1), true) + }; + + let (flags_array, flags_scalar, is_flags_scalar) = + if let Some(flags_array) = flags_array { + if is_flags_scalar || flags_array.len() == 1 { + (None, Some(flags_array.value(0)), true) + } else { + (Some(flags_array), None, false) + } } else { - (Some(flags), None, false) - } - } else { - (None, None, true) - }; + (None, None, true) + }; match (is_regex_scalar, is_start_scalar, is_flags_scalar) { (true, true, true) => { @@ -182,7 +279,7 @@ pub fn regexp_count(args: &[ArrayRef]) -> Result { values .iter() .map(|value| count_matches(value, &pattern, start_scalar)) - .collect::>>()?, + .collect::, ArrowError>>()?, ))) } (true, true, false) => { @@ -195,11 +292,11 @@ pub fn regexp_count(args: &[ArrayRef]) -> Result { let flags_array = flags_array.unwrap(); if values.len() != flags_array.len() { - return exec_err!( + return Err(ArrowError::ComputeError(format!( "flags_array must be the same length as values array; got {} and {}", values.len(), flags_array.len() - ); + ))); } let mut regex_cache = HashMap::new(); @@ -212,7 +309,7 @@ pub fn regexp_count(args: &[ArrayRef]) -> Result { compile_and_cache_regex(regex, flags, &mut regex_cache)?; count_matches(value, &pattern, start_scalar) }) - .collect::>>()?, + .collect::, ArrowError>>()?, ))) } (true, false, true) => { @@ -232,7 +329,7 @@ pub fn regexp_count(args: &[ArrayRef]) -> Result { .iter() .zip(start_array.iter()) .map(|(value, start)| count_matches(value, &pattern, start)) - .collect::>>()?, + .collect::, ArrowError>>()?, ))) } (true, false, false) => { @@ -245,11 +342,11 @@ pub fn regexp_count(args: &[ArrayRef]) -> Result { let flags_array = flags_array.unwrap(); if values.len() != flags_array.len() { - return exec_err!( + return Err(ArrowError::ComputeError(format!( "flags_array must be the same length as values array; got {} and {}", values.len(), flags_array.len() - ); + ))); } let mut regex_cache = HashMap::new(); @@ -265,16 +362,16 @@ pub fn regexp_count(args: &[ArrayRef]) -> Result { count_matches(value, &pattern, start) }) - .collect::>>()?, + .collect::, ArrowError>>()?, ))) } (false, true, true) => { if values.len() != regex_array.len() { - return exec_err!( + return Err(ArrowError::ComputeError(format!( "regex_array must be the same length as values array; got {} and {}", values.len(), regex_array.len() - ); + ))); } let mut regex_cache = HashMap::new(); @@ -295,25 +392,25 @@ pub fn regexp_count(args: &[ArrayRef]) -> Result { )?; count_matches(value, &pattern, start_scalar) }) - .collect::>>()?, + .collect::, ArrowError>>()?, ))) } (false, true, false) => { if values.len() != regex_array.len() { - return exec_err!( + return Err(ArrowError::ComputeError(format!( "regex_array must be the same length as values array; got {} and {}", values.len(), regex_array.len() - ); + ))); } let flags_array = flags_array.unwrap(); if values.len() != flags_array.len() { - return exec_err!( + return Err(ArrowError::ComputeError(format!( "flags_array must be the same length as values array; got {} and {}", values.len(), flags_array.len() - ); + ))); } let mut regex_cache = HashMap::new(); @@ -330,25 +427,25 @@ pub fn regexp_count(args: &[ArrayRef]) -> Result { count_matches(value, &pattern, start_scalar) }) - .collect::>>()?, + .collect::, ArrowError>>()?, ))) } (false, false, true) => { if values.len() != regex_array.len() { - return exec_err!( + return Err(ArrowError::ComputeError(format!( "regex_array must be the same length as values array; got {} and {}", values.len(), regex_array.len() - ); + ))); } let start_array = start_array.unwrap(); if values.len() != start_array.len() { - return exec_err!( + return Err(ArrowError::ComputeError(format!( "start_array must be the same length as values array; got {} and {}", values.len(), start_array.len() - ); + ))); } let mut regex_cache = HashMap::new(); @@ -367,34 +464,34 @@ pub fn regexp_count(args: &[ArrayRef]) -> Result { )?; count_matches(value, &pattern, start) }) - .collect::>>()?, + .collect::, ArrowError>>()?, ))) } (false, false, false) => { if values.len() != regex_array.len() { - return exec_err!( + return Err(ArrowError::ComputeError(format!( "regex_array must be the same length as values array; got {} and {}", values.len(), regex_array.len() - ); + ))); } let start_array = start_array.unwrap(); if values.len() != start_array.len() { - return exec_err!( + return Err(ArrowError::ComputeError(format!( "start_array must be the same length as values array; got {} and {}", values.len(), start_array.len() - ); + ))); } let flags_array = flags_array.unwrap(); if values.len() != flags_array.len() { - return exec_err!( + return Err(ArrowError::ComputeError(format!( "flags_array must be the same length as values array; got {} and {}", values.len(), flags_array.len() - ); + ))); } let mut regex_cache = HashMap::new(); @@ -415,7 +512,7 @@ pub fn regexp_count(args: &[ArrayRef]) -> Result { compile_and_cache_regex(regex, flags, &mut regex_cache)?; count_matches(value, &pattern, start) }) - .collect::>>()?, + .collect::, ArrowError>>()?, ))) } } @@ -425,7 +522,7 @@ fn compile_and_cache_regex( regex: &str, flags: Option<&str>, regex_cache: &mut HashMap, -) -> Result { +) -> Result { match regex_cache.entry(regex.to_string()) { Entry::Vacant(entry) => { let compiled = compile_regex(regex, flags)?; @@ -436,15 +533,14 @@ fn compile_and_cache_regex( } } -fn compile_regex(regex: &str, flags: Option<&str>) -> Result { +fn compile_regex(regex: &str, flags: Option<&str>) -> Result { let pattern = match flags { None | Some("") => regex.to_string(), Some(flags) => { if flags.contains("g") { return Err(ArrowError::ComputeError( "regexp_count() does not support global flag".to_string(), - ) - .into()); + )); } format!("(?{}){}", flags, regex) } @@ -455,7 +551,6 @@ fn compile_regex(regex: &str, flags: Option<&str>) -> Result { "Regular expression did not compile: {}", pattern )) - .into() }) } @@ -463,7 +558,7 @@ fn count_matches( value: Option<&str>, pattern: &Regex, start: Option, -) -> Result { +) -> Result { let value = match value { None | Some("") => return Ok(0), Some(value) => value, @@ -473,8 +568,7 @@ fn count_matches( if start < 1 { return Err(ArrowError::ComputeError( "regexp_count() requires start to be 1 based".to_string(), - ) - .into()); + )); } let find_slice = value.chars().skip(start as usize - 1).collect::(); @@ -489,84 +583,87 @@ fn count_matches( #[cfg(test)] mod tests { use super::*; - use arrow::array::GenericStringArray; + use arrow::array::{GenericStringArray, StringViewArray}; #[test] fn test_regexp_count() { - test_case_sensitive_regexp_count_scalar::(); - test_case_sensitive_regexp_count_scalar::(); - - test_case_sensitive_regexp_count_scalar_start::(); - test_case_sensitive_regexp_count_scalar_start::(); - - test_case_insensitive_regexp_count_scalar_flags::(); - test_case_insensitive_regexp_count_scalar_flags::(); - - test_case_sensitive_regexp_count_array::(); - test_case_sensitive_regexp_count_array::(); - - test_case_sensitive_regexp_count_array_start::(); - test_case_sensitive_regexp_count_array_start::(); - - test_case_insensitive_regexp_count_array_flags::(); - test_case_insensitive_regexp_count_array_flags::(); - - test_case_sensitive_regexp_count_start_scalar_complex::(); - test_case_sensitive_regexp_count_start_scalar_complex::(); - - test_case_sensitive_regexp_count_array_complex::(); - test_case_sensitive_regexp_count_array_complex::(); + test_case_sensitive_regexp_count_scalar::>(); + test_case_sensitive_regexp_count_scalar::>(); + test_case_sensitive_regexp_count_scalar::(); + + test_case_sensitive_regexp_count_scalar_start::>(); + test_case_sensitive_regexp_count_scalar_start::>(); + test_case_sensitive_regexp_count_scalar_start::(); + + test_case_insensitive_regexp_count_scalar_flags::>(); + test_case_insensitive_regexp_count_scalar_flags::>(); + test_case_insensitive_regexp_count_scalar_flags::(); + + test_case_sensitive_regexp_count_array::>(); + test_case_sensitive_regexp_count_array::>(); + test_case_sensitive_regexp_count_array::(); + + test_case_sensitive_regexp_count_array_start::>(); + test_case_sensitive_regexp_count_array_start::>(); + test_case_sensitive_regexp_count_array_start::(); + + test_case_insensitive_regexp_count_array_flags::>(); + test_case_insensitive_regexp_count_array_flags::>(); + test_case_insensitive_regexp_count_array_flags::(); + + test_case_sensitive_regexp_count_start_scalar_complex::>( + ); + test_case_sensitive_regexp_count_start_scalar_complex::>( + ); + test_case_sensitive_regexp_count_start_scalar_complex::(); + + test_case_sensitive_regexp_count_array_complex::>(); + test_case_sensitive_regexp_count_array_complex::>(); + test_case_sensitive_regexp_count_array_complex::(); } - fn test_case_sensitive_regexp_count_scalar() { - let values = GenericStringArray::::from(vec![ - "", - "aabca", - "abcabc", - "abcAbcab", - "abcabcabc", - ]); - let regex = GenericStringArray::::from(vec!["abc"; 1]); + fn test_case_sensitive_regexp_count_scalar() + where + A: From> + Array + 'static, + { + let values = A::from(vec!["", "aabca", "abcabc", "abcAbcab", "abcabcabc"]); + let regex = A::from(vec!["abc"; 1]); + let start = Int64Array::from(vec![2]); - let expected = Int64Array::from(vec![0, 1, 2, 1, 3]); + let expected = Int64Array::from(vec![0, 1, 1, 0, 2]); - let re = regexp_count::(&[Arc::new(values), Arc::new(regex)]).unwrap(); + let re = regexp_count_func(&[Arc::new(values), Arc::new(regex), Arc::new(start)]) + .unwrap(); assert_eq!(re.as_ref(), &expected); } - fn test_case_sensitive_regexp_count_scalar_start() { - let values = GenericStringArray::::from(vec![ - "", - "aabca", - "abcabc", - "abcAbcab", - "abcabcabc", - ]); - let regex = GenericStringArray::::from(vec!["abc"; 1]); + fn test_case_sensitive_regexp_count_scalar_start() + where + A: From> + Array + 'static, + { + let values = A::from(vec!["", "aabca", "abcabc", "abcAbcab", "abcabcabc"]); + let regex = A::from(vec!["abc"; 1]); let start = Int64Array::from(vec![2]); let expected = Int64Array::from(vec![0, 1, 1, 0, 2]); - let re = regexp_count::(&[Arc::new(values), Arc::new(regex), Arc::new(start)]) + let re = regexp_count_func(&[Arc::new(values), Arc::new(regex), Arc::new(start)]) .unwrap(); assert_eq!(re.as_ref(), &expected); } - fn test_case_insensitive_regexp_count_scalar_flags() { - let values = GenericStringArray::::from(vec![ - "", - "aabca", - "abcabc", - "abcAbcab", - "abcabcabc", - ]); - let regex = GenericStringArray::::from(vec!["abc"; 1]); + fn test_case_insensitive_regexp_count_scalar_flags() + where + A: From> + Array + 'static, + { + let values = A::from(vec!["", "aabca", "abcabc", "abcAbcab", "abcabcabc"]); + let regex = A::from(vec!["abc"; 1]); let start = Int64Array::from(vec![1]); - let flags = GenericStringArray::::from(vec!["i"]); + let flags = A::from(vec!["i"]); let expected = Int64Array::from(vec![0, 1, 2, 2, 3]); - let re = regexp_count::(&[ + let re = regexp_count_func(&[ Arc::new(values), Arc::new(regex), Arc::new(start), @@ -576,55 +673,46 @@ mod tests { assert_eq!(re.as_ref(), &expected); } - fn test_case_sensitive_regexp_count_array() { - let values = GenericStringArray::::from(vec![ - "", - "aabca", - "abcabc", - "abcAbcab", - "abcabcAbc", - ]); - let regex = GenericStringArray::::from(vec!["", "abc", "a", "bc", "ab"]); + fn test_case_sensitive_regexp_count_array() + where + A: From> + Array + 'static, + { + let values = A::from(vec!["", "aabca", "abcabc", "abcAbcab", "abcabcAbc"]); + let regex = A::from(vec!["", "abc", "a", "bc", "ab"]); let expected = Int64Array::from(vec![0, 1, 2, 2, 2]); - let re = regexp_count::(&[Arc::new(values), Arc::new(regex)]).unwrap(); + let re = regexp_count_func(&[Arc::new(values), Arc::new(regex)]).unwrap(); assert_eq!(re.as_ref(), &expected); } - fn test_case_sensitive_regexp_count_array_start() { - let values = GenericStringArray::::from(vec![ - "", - "aAbca", - "abcabc", - "abcAbcab", - "abcabcAbc", - ]); - let regex = GenericStringArray::::from(vec!["", "abc", "a", "bc", "ab"]); + fn test_case_sensitive_regexp_count_array_start() + where + A: From> + Array + 'static, + { + let values = A::from(vec!["", "aAbca", "abcabc", "abcAbcab", "abcabcAbc"]); + let regex = A::from(vec!["", "abc", "a", "bc", "ab"]); let start = Int64Array::from(vec![1, 2, 3, 4, 5]); let expected = Int64Array::from(vec![0, 0, 1, 1, 0]); - let re = regexp_count::(&[Arc::new(values), Arc::new(regex), Arc::new(start)]) + let re = regexp_count_func(&[Arc::new(values), Arc::new(regex), Arc::new(start)]) .unwrap(); assert_eq!(re.as_ref(), &expected); } - fn test_case_insensitive_regexp_count_array_flags() { - let values = GenericStringArray::::from(vec![ - "", - "aAbca", - "abcabc", - "abcAbcab", - "abcabcAbc", - ]); - let regex = GenericStringArray::::from(vec!["", "abc", "a", "bc", "ab"]); + fn test_case_insensitive_regexp_count_array_flags() + where + A: From> + Array + 'static, + { + let values = A::from(vec!["", "aAbca", "abcabc", "abcAbcab", "abcabcAbc"]); + let regex = A::from(vec!["", "abc", "a", "bc", "ab"]); let start = Int64Array::from(vec![1]); - let flags = GenericStringArray::::from(vec!["", "i", "", "", "i"]); + let flags = A::from(vec!["", "i", "", "", "i"]); let expected = Int64Array::from(vec![0, 1, 2, 2, 3]); - let re = regexp_count::(&[ + let re = regexp_count_func(&[ Arc::new(values), Arc::new(regex), Arc::new(start), @@ -634,21 +722,18 @@ mod tests { assert_eq!(re.as_ref(), &expected); } - fn test_case_sensitive_regexp_count_start_scalar_complex() { - let values = GenericStringArray::::from(vec![ - "", - "aAbca", - "abcabc", - "abcAbcabc", - "abcabcAbc", - ]); - let regex = GenericStringArray::::from(vec!["", "abc", "a", "bc", "ab"]); + fn test_case_sensitive_regexp_count_start_scalar_complex() + where + A: From> + Array + 'static, + { + let values = A::from(vec!["", "aAbca", "abcabc", "abcAbcabc", "abcabcAbc"]); + let regex = A::from(vec!["", "abc", "a", "bc", "ab"]); let start = Int64Array::from(vec![5]); - let flags = GenericStringArray::::from(vec!["", "i", "", "", "i"]); + let flags = A::from(vec!["", "i", "", "", "i"]); let expected = Int64Array::from(vec![0, 0, 0, 2, 1]); - let re = regexp_count::(&[ + let re = regexp_count_func(&[ Arc::new(values), Arc::new(regex), Arc::new(start), @@ -658,21 +743,18 @@ mod tests { assert_eq!(re.as_ref(), &expected); } - fn test_case_sensitive_regexp_count_array_complex() { - let values = GenericStringArray::::from(vec![ - "", - "aAbca", - "abcabc", - "abcAbcab", - "abcabcAbc", - ]); - let regex = GenericStringArray::::from(vec!["", "abc", "a", "bc", "ab"]); + fn test_case_sensitive_regexp_count_array_complex() + where + A: From> + Array + 'static, + { + let values = A::from(vec!["", "aAbca", "abcabc", "abcAbcab", "abcabcAbc"]); + let regex = A::from(vec!["", "abc", "a", "bc", "ab"]); let start = Int64Array::from(vec![1, 2, 3, 4, 5]); - let flags = GenericStringArray::::from(vec!["", "i", "", "", "i"]); + let flags = A::from(vec!["", "i", "", "", "i"]); let expected = Int64Array::from(vec![0, 1, 1, 1, 1]); - let re = regexp_count::(&[ + let re = regexp_count_func(&[ Arc::new(values), Arc::new(regex), Arc::new(start),