diff --git a/vortex-array/Cargo.toml b/vortex-array/Cargo.toml index aefe26b02..68f54146f 100644 --- a/vortex-array/Cargo.toml +++ b/vortex-array/Cargo.toml @@ -52,7 +52,7 @@ vortex-datetime-dtype = { workspace = true } vortex-dtype = { workspace = true, features = ["flatbuffers", "serde"] } vortex-error = { workspace = true, features = ["flatbuffers", "flexbuffers"] } vortex-flatbuffers = { workspace = true, features = ["array"] } -vortex-scalar = { workspace = true, features = ["flatbuffers", "serde"] } +vortex-scalar = { workspace = true, features = ["flatbuffers", "serde", "arbitrary"] } [features] arbitrary = ["dep:arbitrary", "vortex-dtype/arbitrary"] diff --git a/vortex-array/src/array/arbitrary.rs b/vortex-array/src/array/arbitrary.rs index 9012c5ac7..854ea8817 100644 --- a/vortex-array/src/array/arbitrary.rs +++ b/vortex-array/src/array/arbitrary.rs @@ -1,14 +1,20 @@ use std::iter; +use std::sync::Arc; use arbitrary::{Arbitrary, Result, Unstructured}; use arrow_buffer::BooleanBuffer; +use builders::ListBuilder; +use num_traits::{AsPrimitive, PrimInt}; use vortex_dtype::{DType, NativePType, Nullability, PType}; use vortex_error::{VortexExpect, VortexUnwrap}; +use vortex_scalar::arbitrary::random_scalar; +use vortex_scalar::Scalar; use super::{BoolArray, ChunkedArray, NullArray, PrimitiveArray, StructArray}; use crate::array::{VarBinArray, VarBinViewArray}; +use crate::builders::ArrayBuilder; use crate::validity::Validity; -use crate::{ArrayDType, ArrayData, IntoArrayData as _, IntoArrayVariant}; +use crate::{builders, ArrayDType, ArrayData, IntoArrayData as _, IntoArrayVariant}; impl<'a> Arbitrary<'a> for ArrayData { fn arbitrary(u: &mut Unstructured<'a>) -> Result { @@ -81,10 +87,7 @@ fn random_array(u: &mut Unstructured, dtype: &DType, len: Option) -> Resu .vortex_unwrap() .into_array()) } - // TOOD(joe): add arbitrary list - DType::List(..) => { - todo!("List arrays are not implemented") - } + DType::List(ldt, n) => random_list(u, ldt, n), DType::Extension(..) => { todo!("Extension arrays are not implemented") } @@ -102,6 +105,46 @@ fn random_array(u: &mut Unstructured, dtype: &DType, len: Option) -> Resu } } +fn random_list(u: &mut Unstructured, ldt: &Arc, n: &Nullability) -> Result { + match u.int_in_range(0..=5)? { + 0 => random_list_offset::(u, ldt, n), + 1 => random_list_offset::(u, ldt, n), + 2 => random_list_offset::(u, ldt, n), + 3 => random_list_offset::(u, ldt, n), + 4 => random_list_offset::(u, ldt, n), + 5 => random_list_offset::(u, ldt, n), + _ => unreachable!("int_in_range returns a value in the above range"), + } +} + +fn random_list_offset( + u: &mut Unstructured, + ldt: &Arc, + n: &Nullability, +) -> Result +where + O: PrimInt + NativePType, + Scalar: From, + usize: AsPrimitive, +{ + let list_len = u.int_in_range(0..=20)?; + let mut builder = ListBuilder::::with_capacity(ldt.clone(), *n, 1); + for _ in 0..list_len { + if matches!(n, Nullability::Nullable) || u.arbitrary::()? { + let elem_len = u.int_in_range(0..=20)?; + let elem = (0..elem_len) + .map(|_| random_scalar(u, ldt)) + .collect::>>()?; + builder + .append_value(Scalar::list(ldt.clone(), elem, *n).as_list()) + .vortex_expect("can append value"); + } else { + builder.append_null(); + } + } + Ok(builder.finish().vortex_expect("builder cannot error")) +} + fn split_number_into_parts(n: usize, parts: usize) -> Vec { let reminder = n % parts; let division = (n - reminder) / parts; diff --git a/vortex-array/src/builders/mod.rs b/vortex-array/src/builders/mod.rs index fe6172c4b..7b48fbd72 100644 --- a/vortex-array/src/builders/mod.rs +++ b/vortex-array/src/builders/mod.rs @@ -12,6 +12,7 @@ use std::any::Any; pub use binary::*; pub use bool::*; pub use extension::*; +pub use list::*; pub use null::*; pub use primitive::*; pub use utf8::*; @@ -22,7 +23,6 @@ use vortex_scalar::{ Utf8Scalar, }; -use crate::builders::list::ListBuilder; use crate::builders::struct_::StructBuilder; use crate::ArrayData;