From 7ad8fd4ec2712b972c42ce1708015da3757c7c0e Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Thu, 19 Dec 2024 14:42:28 +0000 Subject: [PATCH] replace into_canonical().into_X() with with into_X() --- bench-vortex/benches/compress.rs | 9 ++------ encodings/alp/src/alp_rd/array.rs | 16 +++++--------- encodings/dict/src/compress.rs | 6 ++--- .../fastlanes/src/bitpacking/compute/take.rs | 2 +- encodings/fsst/src/array.rs | 22 ++++++++----------- encodings/fsst/src/canonical.rs | 5 +---- encodings/fsst/tests/fsst_tests.rs | 10 ++------- encodings/runend-bool/src/array.rs | 7 +++--- vortex-array/benches/take_patches.rs | 6 +---- vortex-array/src/array/chunked/canonical.rs | 2 +- .../src/array/chunked/compute/filter.rs | 6 ++--- vortex-array/src/array/sparse/canonical.rs | 10 +++------ vortex-array/src/array/varbin/canonical.rs | 4 ++-- vortex-array/src/array/varbinview/accessor.rs | 4 ++-- vortex-array/src/arrow/record_batch.rs | 2 +- vortex-array/src/canonical.rs | 3 +-- vortex-array/src/compute/binary_numeric.rs | 10 +-------- 17 files changed, 41 insertions(+), 83 deletions(-) diff --git a/bench-vortex/benches/compress.rs b/bench-vortex/benches/compress.rs index 91bd8c0f2d..95ddbd4c4d 100644 --- a/bench-vortex/benches/compress.rs +++ b/bench-vortex/benches/compress.rs @@ -34,7 +34,7 @@ use vortex::error::VortexResult; use vortex::file::{LayoutContext, LayoutDeserializer, VortexFileWriter, VortexReadBuilder}; use vortex::sampling_compressor::compressors::fsst::FSSTCompressor; use vortex::sampling_compressor::{SamplingCompressor, ALL_ENCODINGS_CONTEXT}; -use vortex::{ArrayDType, ArrayData, IntoArrayData, IntoCanonical}; +use vortex::{ArrayDType, ArrayData, IntoArrayData, IntoArrayVariant, IntoCanonical}; use crate::tokio_runtime::TOKIO_RUNTIME; @@ -412,12 +412,7 @@ fn tpc_h_l_comment(c: &mut Criterion) { "TPC-H l_comment chunked", ); - let comments_canonical = comments - .into_canonical() - .unwrap() - .into_struct() - .unwrap() - .into_array(); + let comments_canonical = comments.into_struct().unwrap().into_array(); let dtype = comments_canonical.dtype().clone(); let comments_canonical_chunked = ChunkedArray::try_new(vec![comments_canonical], dtype).unwrap(); diff --git a/encodings/alp/src/alp_rd/array.rs b/encodings/alp/src/alp_rd/array.rs index bd6adfa3d4..c3480c2474 100644 --- a/encodings/alp/src/alp_rd/array.rs +++ b/encodings/alp/src/alp_rd/array.rs @@ -8,7 +8,8 @@ use vortex_array::stats::{StatisticsVTable, StatsSet}; use vortex_array::validity::{ArrayValidity, LogicalValidity, ValidityVTable}; use vortex_array::visitor::{ArrayVisitor, VisitorVTable}; use vortex_array::{ - impl_encoding, ArrayDType, ArrayData, ArrayLen, ArrayTrait, Canonical, IntoCanonical, + impl_encoding, ArrayDType, ArrayData, ArrayLen, ArrayTrait, Canonical, IntoArrayVariant, + IntoCanonical, }; use vortex_dtype::{DType, Nullability, PType}; use vortex_error::{vortex_bail, VortexExpect, VortexResult}; @@ -192,8 +193,8 @@ impl ALPRDArray { impl IntoCanonical for ALPRDArray { fn into_canonical(self) -> VortexResult { - let left_parts = self.left_parts().into_canonical()?.into_primitive()?; - let right_parts = self.right_parts().into_canonical()?.into_primitive()?; + let left_parts = self.left_parts().into_primitive()?; + let right_parts = self.right_parts().into_primitive()?; // Decode the left_parts using our builtin dictionary. let left_parts_dict = &self.metadata().dict[0..self.metadata().dict_len as usize]; @@ -258,7 +259,7 @@ impl ArrayTrait for ALPRDArray {} mod test { use rstest::rstest; use vortex_array::array::PrimitiveArray; - use vortex_array::{IntoArrayData, IntoCanonical}; + use vortex_array::{IntoArrayData, IntoArrayVariant, IntoCanonical}; use crate::{alp_rd, ALPRDFloat}; @@ -284,12 +285,7 @@ mod test { let rd_array = encoder.encode(&real_array); - let decoded = rd_array - .into_array() - .into_canonical() - .unwrap() - .into_primitive() - .unwrap(); + let decoded = rd_array.into_array().into_primitive().unwrap(); let maybe_null_reals: Vec = reals.into_iter().map(|v| v.unwrap_or_default()).collect(); assert_eq!(decoded.maybe_null_slice::(), &maybe_null_reals); diff --git a/encodings/dict/src/compress.rs b/encodings/dict/src/compress.rs index 144f21f41f..0c41c71ac5 100644 --- a/encodings/dict/src/compress.rs +++ b/encodings/dict/src/compress.rs @@ -10,7 +10,7 @@ use vortex_array::array::{ }; use vortex_array::validity::Validity; use vortex_array::variants::PrimitiveArrayTrait; -use vortex_array::{ArrayDType, IntoArrayData, IntoCanonical}; +use vortex_array::{ArrayDType, IntoArrayData, IntoArrayVariant, IntoCanonical}; use vortex_dtype::{match_each_native_ptype, DType, NativePType, ToBytes}; use vortex_error::{VortexExpect as _, VortexUnwrap}; use vortex_scalar::Scalar; @@ -98,10 +98,8 @@ pub fn dict_encode_varbinview(array: &VarBinViewArray) -> (PrimitiveArray, VarBi ( codes, values - .into_canonical() - .vortex_expect("VarBin to canonical") .into_varbinview() - .vortex_expect("VarBinView"), + .vortex_expect("VarBin to canonical to VarBinView"), ) } diff --git a/encodings/fastlanes/src/bitpacking/compute/take.rs b/encodings/fastlanes/src/bitpacking/compute/take.rs index 3e085cd99d..da46c685d4 100644 --- a/encodings/fastlanes/src/bitpacking/compute/take.rs +++ b/encodings/fastlanes/src/bitpacking/compute/take.rs @@ -23,7 +23,7 @@ impl TakeFn for BitPackedEncoding { fn take(&self, array: &BitPackedArray, indices: &ArrayData) -> VortexResult { // If the indices are large enough, it's faster to flatten and take the primitive array. if indices.len() * UNPACK_CHUNK_THRESHOLD > array.len() { - return take(array.clone().into_canonical()?.into_primitive()?, indices); + return take(array.clone().into_primitive()?, indices); } // NOTE: we use the unsigned PType because all values in the BitPackedArray must diff --git a/encodings/fsst/src/array.rs b/encodings/fsst/src/array.rs index 1e0a5e7dc3..ffa779f8ef 100644 --- a/encodings/fsst/src/array.rs +++ b/encodings/fsst/src/array.rs @@ -9,7 +9,9 @@ use vortex_array::stats::{StatisticsVTable, StatsSet}; use vortex_array::validity::{ArrayValidity, LogicalValidity, Validity, ValidityVTable}; use vortex_array::variants::{BinaryArrayTrait, Utf8ArrayTrait, VariantsVTable}; use vortex_array::visitor::{ArrayVisitor, VisitorVTable}; -use vortex_array::{impl_encoding, ArrayDType, ArrayData, ArrayLen, ArrayTrait, IntoCanonical}; +use vortex_array::{ + impl_encoding, ArrayDType, ArrayData, ArrayLen, ArrayTrait, IntoArrayVariant, IntoCanonical, +}; use vortex_dtype::{DType, Nullability, PType}; use vortex_error::{vortex_bail, VortexExpect, VortexResult}; @@ -163,20 +165,14 @@ impl FSSTArray { F: FnOnce(Decompressor) -> VortexResult, { // canonicalize the symbols child array, so we can view it contiguously - let symbols_array = self - .symbols() - .into_canonical() - .map_err(|err| err.with_context("Failed to canonicalize symbols array"))? - .into_primitive() - .map_err(|err| err.with_context("Symbols must be a Primitive Array"))?; + let symbols_array = self.symbols().into_primitive().map_err(|err| { + err.with_context("Failed to canonicalize symbols array into a Primitive Array") + })?; let symbols = symbols_array.maybe_null_slice::(); - let symbol_lengths_array = self - .symbol_lengths() - .into_canonical() - .map_err(|err| err.with_context("Failed to canonicalize symbol_lengths array"))? - .into_primitive() - .map_err(|err| err.with_context("Symbol lengths must be a Primitive Array"))?; + let symbol_lengths_array = self.symbol_lengths().into_primitive().map_err(|err| { + err.with_context("Failed to canonicalize symbol_lengths array into a Primitive Array") + })?; let symbol_lengths = symbol_lengths_array.maybe_null_slice::(); // Transmute the 64-bit symbol values into fsst `Symbol`s. diff --git a/encodings/fsst/src/canonical.rs b/encodings/fsst/src/canonical.rs index ee7e4866be..dfd9d5d855 100644 --- a/encodings/fsst/src/canonical.rs +++ b/encodings/fsst/src/canonical.rs @@ -31,10 +31,7 @@ impl IntoCanonical for FSSTArray { let uncompressed_bytes = decompressor.decompress(compressed_bytes.maybe_null_slice::()); - let uncompressed_lens_array = self - .uncompressed_lengths() - .into_canonical()? - .into_primitive()?; + let uncompressed_lens_array = self.uncompressed_lengths().into_primitive()?; // Directly create the binary views. let views: Vec = match_each_integer_ptype!(uncompressed_lens_array.ptype(), |$P| { diff --git a/encodings/fsst/tests/fsst_tests.rs b/encodings/fsst/tests/fsst_tests.rs index 69a4801c37..cb669a865d 100644 --- a/encodings/fsst/tests/fsst_tests.rs +++ b/encodings/fsst/tests/fsst_tests.rs @@ -5,7 +5,7 @@ use vortex_array::array::PrimitiveArray; use vortex_array::compute::{filter, scalar_at, slice, take, FilterMask}; use vortex_array::encoding::Encoding; use vortex_array::validity::Validity; -use vortex_array::{ArrayData, IntoArrayData, IntoCanonical}; +use vortex_array::{ArrayData, IntoArrayData, IntoArrayVariant, IntoCanonical}; use vortex_dtype::{DType, Nullability}; use vortex_fsst::{fsst_compress, fsst_train_compressor, FSSTEncoding}; @@ -97,13 +97,7 @@ fn test_fsst_array_ops() { ); // test into_canonical - let canonical_array = fsst_array - .clone() - .into_canonical() - .unwrap() - .into_varbinview() - .unwrap() - .into_array(); + let canonical_array = fsst_array.clone().into_varbinview().unwrap().into_array(); assert_eq!(canonical_array.len(), fsst_array.len()); diff --git a/encodings/runend-bool/src/array.rs b/encodings/runend-bool/src/array.rs index 7ff6776c90..099a46fe64 100644 --- a/encodings/runend-bool/src/array.rs +++ b/encodings/runend-bool/src/array.rs @@ -259,7 +259,8 @@ mod test { use vortex_array::stats::ArrayStatistics; use vortex_array::validity::Validity; use vortex_array::{ - ArrayDType, ArrayData, ArrayLen, IntoArrayData, IntoCanonical, ToArrayData, + ArrayDType, ArrayData, ArrayLen, IntoArrayData, IntoArrayVariant, IntoCanonical, + ToArrayData, }; use vortex_dtype::{DType, Nullability}; @@ -353,8 +354,6 @@ mod test { fn to_bool_vec(arr: &ArrayData) -> Vec { arr.clone() - .into_canonical() - .unwrap() .into_bool() .unwrap() .boolean_buffer() @@ -382,7 +381,7 @@ mod test { let arr = RunEndBoolArray::try_new(ends.into_array(), start, Validity::NonNullable).unwrap(); - let bools = arr.clone().into_canonical().unwrap().into_bool().unwrap(); + let bools = arr.clone().into_bool().unwrap(); for stat in [ Stat::IsConstant, Stat::NullCount, diff --git a/vortex-array/benches/take_patches.rs b/vortex-array/benches/take_patches.rs index bc6cbfcfe8..a0243ce73c 100644 --- a/vortex-array/benches/take_patches.rs +++ b/vortex-array/benches/take_patches.rs @@ -4,7 +4,7 @@ use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; use rand::rngs::StdRng; use rand::{Rng, SeedableRng as _}; use vortex_array::patches::Patches; -use vortex_array::{ArrayData, IntoCanonical as _}; +use vortex_array::{ArrayData, IntoArrayVariant}; fn fixture(len: usize, sparsity: f64, rng: &mut StdRng) -> Patches { // NB: indices are always ordered @@ -53,8 +53,6 @@ fn bench_take(c: &mut Criterion) { patches.take_search( <&ArrayData>::clone(indices) .clone() - .into_canonical() - .unwrap() .into_primitive() .unwrap(), ) @@ -76,8 +74,6 @@ fn bench_take(c: &mut Criterion) { patches.take_map( <&ArrayData>::clone(indices) .clone() - .into_canonical() - .unwrap() .into_primitive() .unwrap(), ) diff --git a/vortex-array/src/array/chunked/canonical.rs b/vortex-array/src/array/chunked/canonical.rs index 4d1d68918d..87f576d375 100644 --- a/vortex-array/src/array/chunked/canonical.rs +++ b/vortex-array/src/array/chunked/canonical.rs @@ -251,7 +251,7 @@ fn pack_views( let canonical_chunk = chunk.clone().into_varbinview()?; for buffer in canonical_chunk.buffers() { - let canonical_buffer = buffer.into_canonical()?.into_primitive()?.into_array(); + let canonical_buffer = buffer.into_primitive()?.into_array(); buffers.push(canonical_buffer); } diff --git a/vortex-array/src/array/chunked/compute/filter.rs b/vortex-array/src/array/chunked/compute/filter.rs index c8f70ff2b2..fa8a5afb93 100644 --- a/vortex-array/src/array/chunked/compute/filter.rs +++ b/vortex-array/src/array/chunked/compute/filter.rs @@ -3,7 +3,7 @@ use vortex_error::{VortexExpect, VortexResult, VortexUnwrap}; use crate::array::{ChunkedArray, ChunkedEncoding, PrimitiveArray}; use crate::compute::{filter, take, FilterFn, FilterMask, SearchSorted, SearchSortedSide}; -use crate::{ArrayDType, ArrayData, ArrayLen, IntoArrayData, IntoCanonical}; +use crate::{ArrayDType, ArrayData, ArrayLen, IntoArrayData, IntoArrayVariant}; // This is modeled after the constant with the equivalent name in arrow-rs. const FILTER_SLICES_SELECTIVITY_THRESHOLD: f64 = 0.8; @@ -65,7 +65,7 @@ fn filter_slices(array: &ChunkedArray, mask: FilterMask) -> VortexResult(); let mut chunk_filters = vec![ChunkFilter::None; array.nchunks()]; @@ -143,7 +143,7 @@ fn filter_indices(array: &ChunkedArray, mask: FilterMask) -> VortexResult(); for set_index in mask.iter_indices()? { diff --git a/vortex-array/src/array/sparse/canonical.rs b/vortex-array/src/array/sparse/canonical.rs index 504d061b75..064b0079e7 100644 --- a/vortex-array/src/array/sparse/canonical.rs +++ b/vortex-array/src/array/sparse/canonical.rs @@ -92,7 +92,7 @@ mod test { use crate::array::sparse::SparseArray; use crate::array::{BoolArray, PrimitiveArray}; use crate::validity::Validity; - use crate::{ArrayDType, IntoArrayData, IntoCanonical}; + use crate::{ArrayDType, IntoArrayData, IntoArrayVariant}; #[rstest] #[case(Some(true))] @@ -106,7 +106,7 @@ mod test { SparseArray::try_new(indices, values, 10, Scalar::from(fill_value)).unwrap(); assert_eq!(*sparse_bools.dtype(), DType::Bool(Nullability::Nullable)); - let flat_bools = sparse_bools.into_canonical().unwrap().into_bool().unwrap(); + let flat_bools = sparse_bools.into_bool().unwrap(); let expected = bool_array_from_nullable_vec( vec![ Some(true), @@ -169,11 +169,7 @@ mod test { DType::Primitive(PType::I32, Nullability::Nullable) ); - let flat_ints = sparse_ints - .into_canonical() - .unwrap() - .into_primitive() - .unwrap(); + let flat_ints = sparse_ints.into_primitive().unwrap(); let expected = PrimitiveArray::from_nullable_vec(vec![ Some(0i32), None, diff --git a/vortex-array/src/array/varbin/canonical.rs b/vortex-array/src/array/varbin/canonical.rs index b8245fb11f..0e8c9af895 100644 --- a/vortex-array/src/array/varbin/canonical.rs +++ b/vortex-array/src/array/varbin/canonical.rs @@ -37,7 +37,7 @@ mod test { use crate::array::varbin::builder::VarBinBuilder; use crate::validity::ArrayValidity; - use crate::{ArrayDType, IntoCanonical}; + use crate::{ArrayDType, IntoArrayVariant}; #[rstest] #[case(DType::Utf8(Nullability::Nullable))] @@ -52,7 +52,7 @@ mod test { varbin.push_value("1234567890123".as_bytes()); let varbin = varbin.finish(dtype.clone()); - let canonical = varbin.into_canonical().unwrap().into_varbinview().unwrap(); + let canonical = varbin.into_varbinview().unwrap(); assert_eq!(canonical.dtype(), &dtype); assert!(!canonical.is_valid(0)); diff --git a/vortex-array/src/array/varbinview/accessor.rs b/vortex-array/src/array/varbinview/accessor.rs index c7ce4a4903..52e761dea2 100644 --- a/vortex-array/src/array/varbinview/accessor.rs +++ b/vortex-array/src/array/varbinview/accessor.rs @@ -6,7 +6,7 @@ use crate::array::primitive::PrimitiveArray; use crate::array::varbinview::VarBinViewArray; use crate::array::BinaryView; use crate::validity::ArrayValidity; -use crate::IntoCanonical; +use crate::IntoArrayVariant; impl ArrayAccessor<[u8]> for VarBinViewArray { fn with_iterator FnOnce(&mut dyn Iterator>) -> R, R>( @@ -14,7 +14,7 @@ impl ArrayAccessor<[u8]> for VarBinViewArray { f: F, ) -> VortexResult { let bytes: Vec = (0..self.metadata().buffer_lens.len()) - .map(|i| self.buffer(i).into_canonical()?.into_primitive()) + .map(|i| self.buffer(i).into_primitive()) .try_collect()?; let bytes_slices: Vec<&[u8]> = bytes.iter().map(|b| b.maybe_null_slice::()).collect(); let views: Vec = self.binary_views()?.collect(); diff --git a/vortex-array/src/arrow/record_batch.rs b/vortex-array/src/arrow/record_batch.rs index 25332b127f..860232b8a9 100644 --- a/vortex-array/src/arrow/record_batch.rs +++ b/vortex-array/src/arrow/record_batch.rs @@ -49,7 +49,7 @@ impl TryFrom for RecordBatch { type Error = VortexError; fn try_from(value: StructArray) -> VortexResult { - let array_ref = value.into_canonical()?.into_arrow()?; + let array_ref = value.into_arrow()?; let struct_array = as_struct_array(array_ref.as_ref()); Ok(Self::from(struct_array)) } diff --git a/vortex-array/src/canonical.rs b/vortex-array/src/canonical.rs index 8c6fddf98c..c2bb116724 100644 --- a/vortex-array/src/canonical.rs +++ b/vortex-array/src/canonical.rs @@ -198,9 +198,8 @@ fn struct_to_arrow(struct_array: StructArray) -> VortexResult { .iter() .zip(struct_array.children()) .map(|(name, f)| { - f.into_canonical() + f.into_arrow() .map_err(|err| err.with_context(format!("Failed to canonicalize field {}", name))) - .and_then(|c| c.into_arrow()) }) .collect::>>()?; diff --git a/vortex-array/src/compute/binary_numeric.rs b/vortex-array/src/compute/binary_numeric.rs index e0a1889d06..b5386c4f2d 100644 --- a/vortex-array/src/compute/binary_numeric.rs +++ b/vortex-array/src/compute/binary_numeric.rs @@ -199,14 +199,12 @@ mod test { use crate::array::PrimitiveArray; use crate::compute::{scalar_at, sub_scalar}; - use crate::{ArrayLen as _, IntoArrayData, IntoCanonical}; + use crate::{ArrayLen as _, IntoArrayData, IntoArrayVariant, IntoCanonical}; #[test] fn test_scalar_subtract_unsigned() { let values = vec![1u16, 2, 3].into_array(); let results = sub_scalar(&values, 1u16.into()) - .unwrap() - .into_canonical() .unwrap() .into_primitive() .unwrap() @@ -219,8 +217,6 @@ mod test { fn test_scalar_subtract_signed() { let values = vec![1i64, 2, 3].into_array(); let results = sub_scalar(&values, (-1i64).into()) - .unwrap() - .into_canonical() .unwrap() .into_primitive() .unwrap() @@ -234,8 +230,6 @@ mod test { let values = PrimitiveArray::from_nullable_vec(vec![Some(1u16), Some(2), None, Some(3)]) .into_array(); let result = sub_scalar(&values, Some(1u16).into()) - .unwrap() - .into_canonical() .unwrap() .into_primitive() .unwrap(); @@ -259,8 +253,6 @@ mod test { let values = vec![1.0f64, 2.0, 3.0].into_array(); let to_subtract = -1f64; let results = sub_scalar(&values, to_subtract.into()) - .unwrap() - .into_canonical() .unwrap() .into_primitive() .unwrap()