Skip to content

Commit

Permalink
benches: add lower benches (#12152)
Browse files Browse the repository at this point in the history
  • Loading branch information
tshauck authored Aug 26, 2024
1 parent ed12f11 commit 55a1459
Showing 1 changed file with 90 additions and 2 deletions.
92 changes: 90 additions & 2 deletions datafusion/functions/benches/lower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@

extern crate criterion;

use arrow::array::{ArrayRef, StringArray};
use arrow::util::bench_util::create_string_array_with_len;
use arrow::array::{ArrayRef, StringArray, StringViewBuilder};
use arrow::util::bench_util::{
create_string_array_with_len, create_string_view_array_with_len,
};
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use datafusion_expr::ColumnarValue;
use datafusion_functions::string;
Expand Down Expand Up @@ -65,6 +67,58 @@ fn create_args3(size: usize) -> Vec<ColumnarValue> {
vec![ColumnarValue::Array(array)]
}

/// Create an array of args containing StringViews, where all the values in the
/// StringViews are ASCII.
/// * `size` - the length of the StringViews, and
/// * `str_len` - the length of the strings within the array.
/// * `null_density` - the density of null values in the array.
/// * `mixed` - whether the array is mixed between inlined and referenced strings.
fn create_args4(
size: usize,
str_len: usize,
null_density: f32,
mixed: bool,
) -> Vec<ColumnarValue> {
let array = Arc::new(create_string_view_array_with_len(
size,
null_density,
str_len,
mixed,
));

vec![ColumnarValue::Array(array)]
}

/// Create an array of args containing a StringViewArray, where some of the values in the
/// array are non-ASCII.
/// * `size` - the length of the StringArray, and
/// * `non_ascii_density` - the density of non-ASCII values in the array.
/// * `null_density` - the density of null values in the array.
fn create_args5(
size: usize,
non_ascii_density: f32,
null_density: f32,
) -> Vec<ColumnarValue> {
let mut string_view_builder = StringViewBuilder::with_capacity(size);
for _ in 0..size {
// sample null_density to determine if the value should be null
if rand::random::<f32>() < null_density {
string_view_builder.append_null();
continue;
}

// sample non_ascii_density to determine if the value should be non-ASCII
if rand::random::<f32>() < non_ascii_density {
string_view_builder.append_value("农历新年农历新年农历新年农历新年农历新年");
} else {
string_view_builder.append_value("DATAFUSIONDATAFUSIONDATAFUSION");
}
}

let array = Arc::new(string_view_builder.finish()) as ArrayRef;
vec![ColumnarValue::Array(array)]
}

fn criterion_benchmark(c: &mut Criterion) {
let lower = string::lower();
for size in [1024, 4096, 8192] {
Expand All @@ -85,6 +139,40 @@ fn criterion_benchmark(c: &mut Criterion) {
|b| b.iter(|| black_box(lower.invoke(&args))),
);
}

let sizes = [4096, 8192];
let str_lens = [10, 64, 128];
let mixes = [true, false];
let null_densities = [0.0f32, 0.1f32];

for null_density in &null_densities {
for &mixed in &mixes {
for &str_len in &str_lens {
for &size in &sizes {
let args = create_args4(size, str_len, *null_density, mixed);
c.bench_function(
&format!("lower_all_values_are_ascii_string_views: size: {}, str_len: {}, null_density: {}, mixed: {}",
size, str_len, null_density, mixed),
|b| b.iter(|| black_box(lower.invoke(&args))),
);

let args = create_args4(size, str_len, *null_density, mixed);
c.bench_function(
&format!("lower_all_values_are_ascii_string_views: size: {}, str_len: {}, null_density: {}, mixed: {}",
size, str_len, null_density, mixed),
|b| b.iter(|| black_box(lower.invoke(&args))),
);

let args = create_args5(size, 0.1, *null_density);
c.bench_function(
&format!("lower_some_values_are_nonascii_string_views: size: {}, str_len: {}, non_ascii_density: {}, null_density: {}, mixed: {}",
size, str_len, 0.1, null_density, mixed),
|b| b.iter(|| black_box(lower.invoke(&args))),
);
}
}
}
}
}

criterion_group!(benches, criterion_benchmark);
Expand Down

0 comments on commit 55a1459

Please sign in to comment.