Skip to content

Commit

Permalink
RowMask uses ConstantArray for all valid and all invalid selections and
Browse files Browse the repository at this point in the history
remove unsafe RowMask constructor
  • Loading branch information
robert3005 committed Nov 27, 2024
1 parent e452cbf commit f7ab855
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 52 deletions.
2 changes: 1 addition & 1 deletion vortex-file/src/read/layouts/chunked.rs
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ impl ChunkedLayoutReader {
.zip(in_progress_range)
.filter(|(_, cr)| !cr.finished())
{
let layout_selection = mask.slice(*begin, *end).shift(*begin)?;
let layout_selection = mask.slice(*begin, *end)?.shift(*begin)?;
if let Some(rr) = layout.read_selection(&layout_selection)? {
match rr {
BatchRead::ReadMore(m) => {
Expand Down
76 changes: 29 additions & 47 deletions vortex-file/src/read/mask.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
use std::cmp::{max, min};
use std::fmt::{Display, Formatter};

use arrow_buffer::BooleanBuffer;
use vortex_array::array::{BoolArray, PrimitiveArray, SparseArray};
use vortex_array::array::{BoolArray, ConstantArray, PrimitiveArray, SparseArray};
use vortex_array::compute::{and, filter, slice, take, try_cast, FilterMask, TakeOptions};
use vortex_array::stats::ArrayStatistics;
use vortex_array::validity::{ArrayValidity, LogicalValidity};
use vortex_array::{ArrayDType, ArrayData, IntoArrayData, IntoArrayVariant};
use vortex_dtype::Nullability::NonNullable;
use vortex_dtype::{DType, PType};
use vortex_error::{vortex_bail, VortexExpect, VortexResult};
use vortex_error::{vortex_bail, VortexExpect, VortexResult, VortexUnwrap};

const PREFER_TAKE_TO_FILTER_DENSITY: f64 = 1.0 / 1024.0;

Expand Down Expand Up @@ -73,37 +72,22 @@ impl RowMask {

/// Construct a RowMask which is valid in the given range.
pub fn new_valid_between(begin: usize, end: usize) -> Self {
unsafe {
RowMask::new_unchecked(
BoolArray::from(BooleanBuffer::new_set(end - begin)).into_array(),
begin,
end,
)
}
RowMask::try_new(
ConstantArray::new(true, end - begin).into_array(),
begin,
end,
)
.vortex_unwrap()
}

/// Construct a RowMask which is invalid everywhere in the given range.
pub fn new_invalid_between(begin: usize, end: usize) -> Self {
unsafe {
RowMask::new_unchecked(
BoolArray::from(BooleanBuffer::new_unset(end - begin)).into_array(),
begin,
end,
)
}
}

/// Construct a RowMask from given bitmask, begin and end.
///
/// # Safety
///
/// The bitmask must be of a nonnullable bool array and length of end - begin
pub unsafe fn new_unchecked(bitmask: ArrayData, begin: usize, end: usize) -> Self {
Self {
bitmask,
RowMask::try_new(
ConstantArray::new(false, end - begin).into_array(),
begin,
end,
}
)
.vortex_unwrap()
}

/// Construct a RowMask from a Boolean typed array.
Expand Down Expand Up @@ -185,25 +169,22 @@ impl RowMask {
}

/// Limit mask to [begin..end) range
pub fn slice(&self, begin: usize, end: usize) -> Self {
pub fn slice(&self, begin: usize, end: usize) -> VortexResult<Self> {
let range_begin = max(self.begin, begin);
let range_end = min(self.end, end);
unsafe {
RowMask::new_unchecked(
if range_begin == self.begin && range_end == self.end {
self.bitmask.clone()
} else {
slice(
&self.bitmask,
range_begin - self.begin,
range_end - self.begin,
)
.vortex_expect("Must be a valid slice")
},
range_begin,
range_end,
)
}
RowMask::try_new(
if range_begin == self.begin && range_end == self.end {
self.bitmask.clone()
} else {
slice(
&self.bitmask,
range_begin - self.begin,
range_end - self.begin,
)?
},
range_begin,
range_end,
)
}

/// Filter array with this `RowMask`.
Expand Down Expand Up @@ -262,7 +243,7 @@ impl RowMask {
self.begin
)
}
Ok(unsafe { RowMask::new_unchecked(self.bitmask, self.begin - offset, self.end - offset) })
RowMask::try_new(self.bitmask, self.begin - offset, self.end - offset)
}
}

Expand All @@ -273,6 +254,7 @@ mod tests {
use vortex_array::array::{BoolArray, PrimitiveArray};
use vortex_array::{IntoArrayData, IntoArrayVariant};
use vortex_dtype::Nullability;
use vortex_error::VortexUnwrap;

use crate::read::mask::RowMask;

Expand All @@ -299,7 +281,7 @@ mod tests {
RowMask::try_new(BoolArray::from_iter([false, true]).into_array(), 3, 5).unwrap())]
#[cfg_attr(miri, ignore)]
fn slice(#[case] first: RowMask, #[case] range: (usize, usize), #[case] expected: RowMask) {
assert_eq!(first.slice(range.0, range.1), expected);
assert_eq!(first.slice(range.0, range.1).vortex_unwrap(), expected);
}

#[test]
Expand Down
14 changes: 10 additions & 4 deletions vortex-file/src/read/splits.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,11 +103,12 @@ impl PruningSplitIterator {
};

// we masked everything out, so move on
if row_mask.slice(begin, end).is_empty() {
let sliced = row_mask.slice(begin, end)?;
if sliced.is_empty() {
continue;
}

return self.read_mask(row_mask.slice(begin, end));
return self.read_mask(sliced);
}

Ok(None)
Expand Down Expand Up @@ -197,10 +198,15 @@ impl Iterator for FixedSplitIterator {
// Find next range that's not filtered out by supplied row_mask
for (begin, end) in ranges {
return if let Some(ref row_mask) = self.row_mask {
if row_mask.slice(begin, end).is_empty() {
let sliced = match row_mask.slice(begin, end) {
Ok(s) => s,
Err(e) => return Some(Err(e)),
};

if sliced.is_empty() {
continue;
}
Some(Ok(SplitMask::Mask(row_mask.slice(begin, end))))
Some(Ok(SplitMask::Mask(sliced)))
} else {
Some(Ok(SplitMask::Mask(RowMask::new_valid_between(begin, end))))
};
Expand Down

0 comments on commit f7ab855

Please sign in to comment.