Skip to content

Commit

Permalink
Refactoring regexp_count
Browse files Browse the repository at this point in the history
  • Loading branch information
xinlifoobar committed Aug 29, 2024
1 parent 0333ec4 commit 07312be
Show file tree
Hide file tree
Showing 2 changed files with 288 additions and 184 deletions.
30 changes: 26 additions & 4 deletions datafusion/functions/benches/regx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,10 @@ extern crate criterion;

use arrow::array::builder::StringBuilder;
use arrow::array::{ArrayRef, Int64Array, StringArray};
use arrow::compute::cast;
use arrow::datatypes::DataType;
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use datafusion_functions::regex::regexpcount::regexp_count;
use datafusion_functions::regex::regexpcount::regexp_count_func;
use datafusion_functions::regex::regexplike::regexp_like;
use datafusion_functions::regex::regexpmatch::regexp_match;
use datafusion_functions::regex::regexpreplace::regexp_replace;
Expand Down Expand Up @@ -85,7 +87,7 @@ fn flags(rng: &mut ThreadRng) -> StringArray {
}

fn criterion_benchmark(c: &mut Criterion) {
c.bench_function("regexp_count_1000", |b| {
c.bench_function("regexp_count_1000 string", |b| {
let mut rng = rand::thread_rng();
let data = Arc::new(data(&mut rng)) as ArrayRef;
let regex = Arc::new(regex(&mut rng)) as ArrayRef;
Expand All @@ -94,13 +96,33 @@ fn criterion_benchmark(c: &mut Criterion) {

b.iter(|| {
black_box(
regexp_count::<i32>(&[
regexp_count_func(&[
Arc::clone(&data),
Arc::clone(&regex),
Arc::clone(&start),
Arc::clone(&flags),
])
.expect("regexp_count should work on valid values"),
.expect("regexp_count should work on utf8"),
)
})
});

c.bench_function("regexp_count_1000 utf8view", |b| {
let mut rng = rand::thread_rng();
let data = cast(&data(&mut rng), &DataType::Utf8View).unwrap();
let regex = cast(&regex(&mut rng), &DataType::Utf8View).unwrap();
let start = Arc::new(start(&mut rng)) as ArrayRef;
let flags = cast(&flags(&mut rng), &DataType::Utf8View).unwrap();

b.iter(|| {
black_box(
regexp_count_func(&[
Arc::clone(&data),
Arc::clone(&regex),
Arc::clone(&start),
Arc::clone(&flags),
])
.expect("regexp_count should work on utf8view"),
)
})
});
Expand Down
Loading

0 comments on commit 07312be

Please sign in to comment.