diff --git a/datafusion/functions/src/strings.rs b/datafusion/functions/src/strings.rs index caafbae6ba5f..a6587a91a9fe 100644 --- a/datafusion/functions/src/strings.rs +++ b/datafusion/functions/src/strings.rs @@ -124,8 +124,12 @@ pub struct StringArrayBuilder { impl StringArrayBuilder { pub fn with_capacity(item_capacity: usize, data_capacity: usize) -> Self { - let mut offsets_buffer = - MutableBuffer::with_capacity((item_capacity + 1) * size_of::()); + let capacity = item_capacity + .checked_add(1) + .map(|i| i.saturating_mul(size_of::())) + .expect("capacity integer overflow"); + + let mut offsets_buffer = MutableBuffer::with_capacity(capacity); // SAFETY: the first offset value is definitely not going to exceed the bounds. unsafe { offsets_buffer.push_unchecked(0_i32) }; Self { @@ -182,7 +186,7 @@ impl StringArrayBuilder { .len() .try_into() .expect("byte array offset overflow"); - unsafe { self.offsets_buffer.push_unchecked(next_offset) }; + self.offsets_buffer.push(next_offset); } /// Finalise the builder into a concrete [`StringArray`]. @@ -289,8 +293,12 @@ pub struct LargeStringArrayBuilder { impl LargeStringArrayBuilder { pub fn with_capacity(item_capacity: usize, data_capacity: usize) -> Self { - let mut offsets_buffer = - MutableBuffer::with_capacity((item_capacity + 1) * size_of::()); + let capacity = item_capacity + .checked_add(1) + .map(|i| i.saturating_mul(size_of::())) + .expect("capacity integer overflow"); + + let mut offsets_buffer = MutableBuffer::with_capacity(capacity); // SAFETY: the first offset value is definitely not going to exceed the bounds. unsafe { offsets_buffer.push_unchecked(0_i64) }; Self { @@ -347,7 +355,7 @@ impl LargeStringArrayBuilder { .len() .try_into() .expect("byte array offset overflow"); - unsafe { self.offsets_buffer.push_unchecked(next_offset) }; + self.offsets_buffer.push(next_offset); } /// Finalise the builder into a concrete [`LargeStringArray`]. @@ -452,3 +460,20 @@ impl ColumnarValueRef<'_> { } } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + #[should_panic(expected = "capacity integer overflow")] + fn test_overflow_string_array_builder() { + let _builder = StringArrayBuilder::with_capacity(usize::MAX, usize::MAX); + } + + #[test] + #[should_panic(expected = "capacity integer overflow")] + fn test_overflow_large_string_array_builder() { + let _builder = LargeStringArrayBuilder::with_capacity(usize::MAX, usize::MAX); + } +}