Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/develop' into aduffy/more-filterfn
Browse files Browse the repository at this point in the history
  • Loading branch information
a10y committed Sep 13, 2024
2 parents dc15df5 + 750b9c8 commit d5ecb80
Show file tree
Hide file tree
Showing 17 changed files with 410 additions and 151 deletions.
3 changes: 2 additions & 1 deletion fuzz/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ impl<'a> Arbitrary<'a> for FuzzArrayAction {
let array = Array::arbitrary(u)?;
let mut current_array = array.clone();
let mut actions = Vec::new();
for _ in 0..u.int_in_range(1..=4)? {
let action_count = u.int_in_range(1..=4)?;
while actions.len() < action_count {
actions.push(match u.int_in_range(0..=3)? {
0 => {
if actions
Expand Down
19 changes: 18 additions & 1 deletion fuzz/src/search_sorted.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@ use std::cmp::Ordering;
use vortex::accessor::ArrayAccessor;
use vortex::compute::{IndexOrd, Len, SearchResult, SearchSorted, SearchSortedSide};
use vortex::validity::ArrayValidity;
use vortex::variants::StructArrayTrait;
use vortex::{Array, ArrayDType, IntoArray, IntoArrayVariant};
use vortex_buffer::{Buffer, BufferString};
use vortex_dtype::{match_each_native_ptype, DType};
use vortex_scalar::Scalar;
use vortex_scalar::{Scalar, StructScalar};

struct SearchNullableSlice<T>(Vec<Option<T>>);

Expand Down Expand Up @@ -92,6 +93,22 @@ pub fn search_sorted_canonical_array(
};
SearchNullableSlice(opt_values).search_sorted(&Some(to_find), side)
}
DType::Struct(..) => {
let strct = array.clone().into_struct().unwrap();
let scalar_fields: StructScalar = scalar.try_into().unwrap();
let mut res: Option<SearchResult> = None;
for (c, i) in strct.children().zip(0..strct.names().len()) {
res = Some(search_sorted_canonical_array(
&c,
&scalar_fields.field_by_idx(i).unwrap(),
side,
));
if let SearchResult::NotFound(u) = res.unwrap() {
return SearchResult::NotFound(u);
}
}
res.unwrap()
}
_ => unreachable!("Not a canonical array"),
}
}
28 changes: 27 additions & 1 deletion fuzz/src/slice.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use vortex::accessor::ArrayAccessor;
use vortex::array::{BoolArray, PrimitiveArray, VarBinArray};
use vortex::array::{BoolArray, PrimitiveArray, StructArray, VarBinArray};
use vortex::validity::{ArrayValidity, Validity};
use vortex::variants::StructArrayTrait;
use vortex::{Array, ArrayDType, IntoArray, IntoArrayVariant};
use vortex_dtype::{match_each_native_ptype, DType};

Expand Down Expand Up @@ -52,6 +53,31 @@ pub fn slice_canonical_array(array: &Array, start: usize, stop: usize) -> Array
VarBinArray::from_iter(Vec::from(&values[start..stop]), array.dtype().clone())
.into_array()
}
DType::Struct(..) => {
let struct_array = array.clone().into_struct().unwrap();
let sliced_children = struct_array
.children()
.map(|c| slice_canonical_array(&c, start, stop))
.collect::<Vec<_>>();
let vec_validity = struct_array
.logical_validity()
.into_array()
.into_bool()
.unwrap()
.boolean_buffer()
.iter()
.collect::<Vec<_>>();

let len = sliced_children[0].len();
StructArray::try_new(
struct_array.names().clone(),
sliced_children,
len,
Validity::from(Vec::from(&vec_validity[start..stop])),
)
.unwrap()
.into_array()
}
_ => unreachable!("Array::arbitrary will not generate other dtypes"),
}
}
13 changes: 13 additions & 0 deletions fuzz/src/sort.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,13 @@ use std::cmp::Ordering;

use vortex::accessor::ArrayAccessor;
use vortex::array::{BoolArray, PrimitiveArray, VarBinArray};
use vortex::compute::unary::scalar_at;
use vortex::validity::ArrayValidity;
use vortex::{Array, ArrayDType, IntoArray, IntoArrayVariant};
use vortex_dtype::{match_each_float_ptype, match_each_integer_ptype, DType};

use crate::take::take_canonical_array;

pub fn sort_canonical_array(array: &Array) -> Array {
match array.dtype() {
DType::Bool(_) => {
Expand Down Expand Up @@ -84,6 +87,16 @@ pub fn sort_canonical_array(array: &Array) -> Array {
sort_opt_slice(&mut opt_values);
VarBinArray::from_iter(opt_values, array.dtype().clone()).into_array()
}
DType::Struct(..) => {
let mut sort_indices = (0..array.len()).collect::<Vec<_>>();
sort_indices.sort_by(|a, b| {
scalar_at(array, *a)
.unwrap()
.partial_cmp(&scalar_at(array, *b).unwrap())
.unwrap()
});
take_canonical_array(array, &sort_indices)
}
_ => unreachable!("Not a canonical array"),
}
}
Expand Down
28 changes: 27 additions & 1 deletion fuzz/src/take.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use vortex::accessor::ArrayAccessor;
use vortex::array::{BoolArray, PrimitiveArray, VarBinArray};
use vortex::array::{BoolArray, PrimitiveArray, StructArray, VarBinArray};
use vortex::validity::{ArrayValidity, Validity};
use vortex::variants::StructArrayTrait;
use vortex::{Array, ArrayDType, IntoArray, IntoArrayVariant};
use vortex_dtype::{match_each_native_ptype, DType};

Expand Down Expand Up @@ -55,6 +56,31 @@ pub fn take_canonical_array(array: &Array, indices: &[usize]) -> Array {
)
.into_array()
}
DType::Struct(..) => {
let struct_array = array.clone().into_struct().unwrap();
let taken_children = struct_array
.children()
.map(|c| take_canonical_array(&c, indices))
.collect::<Vec<_>>();
let vec_validity = struct_array
.logical_validity()
.into_array()
.into_bool()
.unwrap()
.boolean_buffer()
.iter()
.collect::<Vec<_>>();

let len = taken_children[0].len();
StructArray::try_new(
struct_array.names().clone(),
taken_children,
len,
Validity::from(indices.iter().map(|i| vec_validity[*i]).collect::<Vec<_>>()),
)
.unwrap()
.into_array()
}
_ => unreachable!("Array::arbitrary will not generate other dtypes"),
}
}
202 changes: 143 additions & 59 deletions vortex-array/src/array/arbitrary.rs
Original file line number Diff line number Diff line change
@@ -1,84 +1,168 @@
use arbitrary::{Arbitrary, Result, Unstructured};
use vortex_dtype::{DType, NativePType, Nullability};
use vortex_dtype::{DType, FieldName, NativePType, Nullability};
use vortex_error::VortexUnwrap;

use super::{BoolArray, PrimitiveArray};
use super::{BoolArray, ChunkedArray, PrimitiveArray, StructArray};
use crate::array::{VarBinArray, VarBinViewArray};
use crate::validity::Validity;
use crate::{Array, IntoArray as _};
use crate::{Array, ArrayDType, IntoArray as _};

impl<'a> Arbitrary<'a> for Array {
fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
random_array(u)
let nullability = if u.arbitrary()? {
Nullability::Nullable
} else {
Nullability::NonNullable
};
random_array(u, None, nullability)
}
}

fn random_array(u: &mut Unstructured) -> Result<Array> {
match u.int_in_range(0..=12)? {
0 => random_primitive::<u8>(u),
1 => random_primitive::<u16>(u),
2 => random_primitive::<u32>(u),
3 => random_primitive::<u64>(u),
4 => random_primitive::<i8>(u),
5 => random_primitive::<i16>(u),
6 => random_primitive::<i32>(u),
7 => random_primitive::<i64>(u),
8 => random_primitive::<f32>(u),
9 => random_primitive::<f64>(u),
10 => random_bool(u),
11 => random_string(u),
12 => random_bytes(u),
_ => unreachable!(),
fn random_array(
u: &mut Unstructured,
len: Option<usize>,
nullability: Nullability,
) -> Result<Array> {
let array_kind = u.int_in_range(0..=13)?;
let name_count = u.int_in_range(1..=10)?;
let names: Vec<FieldName> = arbitrary_vec_of_len(u, Some(name_count))?;
let mut chunks = (0..u.int_in_range(1..=11)?)
.map(|_| match array_kind {
0 => random_primitive::<u8>(u, len, nullability),
1 => random_primitive::<u16>(u, len, nullability),
2 => random_primitive::<u32>(u, len, nullability),
3 => random_primitive::<u64>(u, len, nullability),
4 => random_primitive::<i8>(u, len, nullability),
5 => random_primitive::<i16>(u, len, nullability),
6 => random_primitive::<i32>(u, len, nullability),
7 => random_primitive::<i64>(u, len, nullability),
8 => random_primitive::<f32>(u, len, nullability),
9 => random_primitive::<f64>(u, len, nullability),
10 => random_bool(u, len, nullability),
11 => random_string(u, len, nullability),
12 => random_bytes(u, len, nullability),
13 => random_struct(u, len, names.clone(), nullability),
_ => unreachable!(),
})
.collect::<Result<Vec<_>>>()?;
if chunks.len() == 1 {
Ok(chunks.remove(0))
} else {
let dtype = chunks[0].dtype().clone();
Ok(ChunkedArray::try_new(chunks, dtype)
.vortex_unwrap()
.into_array())
}
}

fn random_string(u: &mut Unstructured) -> Result<Array> {
let v = Vec::<Option<String>>::arbitrary(u)?;
let arr = match u.int_in_range(0..=1)? {
0 => VarBinArray::from_iter(v, DType::Utf8(Nullability::Nullable)).into_array(),
1 => VarBinViewArray::from_iter_nullable_str(v).into_array(),
_ => unreachable!(),
};

Ok(arr)
fn random_string(
u: &mut Unstructured,
len: Option<usize>,
nullability: Nullability,
) -> Result<Array> {
match nullability {
Nullability::NonNullable => {
let v = arbitrary_vec_of_len::<String>(u, len)?;
Ok(match u.int_in_range(0..=1)? {
0 => VarBinArray::from_vec(v, DType::Utf8(Nullability::NonNullable)).into_array(),
1 => VarBinViewArray::from_iter_str(v).into_array(),
_ => unreachable!(),
})
}
Nullability::Nullable => {
let v = arbitrary_vec_of_len::<Option<String>>(u, len)?;
Ok(match u.int_in_range(0..=1)? {
0 => VarBinArray::from_iter(v, DType::Utf8(Nullability::Nullable)).into_array(),
1 => VarBinViewArray::from_iter_nullable_str(v).into_array(),
_ => unreachable!(),
})
}
}
}

fn random_bytes(u: &mut Unstructured) -> Result<Array> {
let v = Vec::<Option<Vec<u8>>>::arbitrary(u)?;
let arr = match u.int_in_range(0..=1)? {
0 => VarBinArray::from_iter(v, DType::Binary(Nullability::Nullable)).into_array(),
1 => VarBinViewArray::from_iter_nullable_bin(v).into_array(),
_ => unreachable!(),
};

Ok(arr)
fn random_bytes(
u: &mut Unstructured,
len: Option<usize>,
nullability: Nullability,
) -> Result<Array> {
match nullability {
Nullability::NonNullable => {
let v = arbitrary_vec_of_len::<Vec<u8>>(u, len)?;
Ok(match u.int_in_range(0..=1)? {
0 => VarBinArray::from_vec(v, DType::Binary(Nullability::NonNullable)).into_array(),
1 => VarBinViewArray::from_iter_bin(v).into_array(),
_ => unreachable!(),
})
}
Nullability::Nullable => {
let v = arbitrary_vec_of_len::<Option<Vec<u8>>>(u, len)?;
Ok(match u.int_in_range(0..=1)? {
0 => VarBinArray::from_iter(v, DType::Binary(Nullability::Nullable)).into_array(),
1 => VarBinViewArray::from_iter_nullable_bin(v).into_array(),
_ => unreachable!(),
})
}
}
}

fn random_primitive<'a, T: Arbitrary<'a> + NativePType>(u: &mut Unstructured<'a>) -> Result<Array> {
let v = Vec::<T>::arbitrary(u)?;
let validity = random_validity(u, v.len())?;
fn random_primitive<'a, T: Arbitrary<'a> + NativePType>(
u: &mut Unstructured<'a>,
len: Option<usize>,
nullability: Nullability,
) -> Result<Array> {
let v = arbitrary_vec_of_len::<T>(u, len)?;
let validity = random_validity(u, v.len(), nullability)?;
Ok(PrimitiveArray::from_vec(v, validity).into_array())
}

fn random_bool(u: &mut Unstructured) -> Result<Array> {
let v = Vec::<bool>::arbitrary(u)?;
let validity = random_validity(u, v.len())?;

fn random_bool(
u: &mut Unstructured,
len: Option<usize>,
nullability: Nullability,
) -> Result<Array> {
let v = arbitrary_vec_of_len(u, len)?;
let validity = random_validity(u, v.len(), nullability)?;
Ok(BoolArray::from_vec(v, validity).into_array())
}

fn random_validity(u: &mut Unstructured, len: usize) -> Result<Validity> {
let v = match u.int_in_range(0..=3)? {
0 => Validity::AllValid,
1 => Validity::AllInvalid,
2 => Validity::NonNullable,
3 => {
let bools = (0..len)
.map(|_| bool::arbitrary(u))
.collect::<Result<Vec<_>>>()?;
Validity::from(bools)
}
_ => unreachable!(),
};
fn random_validity(u: &mut Unstructured, len: usize, nullability: Nullability) -> Result<Validity> {
match nullability {
Nullability::NonNullable => Ok(Validity::NonNullable),
Nullability::Nullable => Ok(match u.int_in_range(0..=2)? {
0 => Validity::AllValid,
1 => Validity::AllInvalid,
2 => Validity::from(arbitrary_vec_of_len(u, Some(len))?),
_ => unreachable!(),
}),
}
}

fn random_struct(
u: &mut Unstructured,
len: Option<usize>,
names: Vec<FieldName>,
nullability: Nullability,
) -> Result<Array> {
let first_arr = random_array(u, len, nullability)?;
let defined_len = len.unwrap_or(first_arr.len());
let arrays = [Ok(first_arr)]
.into_iter()
.chain((1..names.len()).map(|_| random_array(u, Some(defined_len), nullability)))
.collect::<Result<Vec<_>>>()?;
Ok(StructArray::try_new(
names.into(),
arrays,
defined_len,
random_validity(u, defined_len, nullability)?,
)
.vortex_unwrap()
.into_array())
}

Ok(v)
fn arbitrary_vec_of_len<'a, T: Arbitrary<'a>>(
u: &mut Unstructured<'a>,
len: Option<usize>,
) -> Result<Vec<T>> {
len.map(|l| (0..l).map(|_| T::arbitrary(u)).collect::<Result<Vec<_>>>())
.unwrap_or_else(|| Vec::<T>::arbitrary(u))
}
Loading

0 comments on commit d5ecb80

Please sign in to comment.