From 82a40f3edf264834c28f3a1bf2a3143951802634 Mon Sep 17 00:00:00 2001 From: wiedld <wiedld@users.noreply.github.com> Date: Wed, 18 Dec 2024 03:11:40 -0800 Subject: [PATCH] Handle possible overflows in StringArrayBuilder / LargeStringArrayBuilder (#13802) * test(13796): reproducer of overflow on capacity * fix(13796): handle overflows with proper max capacity number which is valid for MutableBuffer * refactor: use simple solution and provide panic --- datafusion/functions/src/strings.rs | 37 ++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 6 deletions(-) diff --git a/datafusion/functions/src/strings.rs b/datafusion/functions/src/strings.rs index caafbae6ba5f..a6587a91a9fe 100644 --- a/datafusion/functions/src/strings.rs +++ b/datafusion/functions/src/strings.rs @@ -124,8 +124,12 @@ pub struct StringArrayBuilder { impl StringArrayBuilder { pub fn with_capacity(item_capacity: usize, data_capacity: usize) -> Self { - let mut offsets_buffer = - MutableBuffer::with_capacity((item_capacity + 1) * size_of::<i32>()); + let capacity = item_capacity + .checked_add(1) + .map(|i| i.saturating_mul(size_of::<i32>())) + .expect("capacity integer overflow"); + + let mut offsets_buffer = MutableBuffer::with_capacity(capacity); // SAFETY: the first offset value is definitely not going to exceed the bounds. unsafe { offsets_buffer.push_unchecked(0_i32) }; Self { @@ -182,7 +186,7 @@ impl StringArrayBuilder { .len() .try_into() .expect("byte array offset overflow"); - unsafe { self.offsets_buffer.push_unchecked(next_offset) }; + self.offsets_buffer.push(next_offset); } /// Finalise the builder into a concrete [`StringArray`]. @@ -289,8 +293,12 @@ pub struct LargeStringArrayBuilder { impl LargeStringArrayBuilder { pub fn with_capacity(item_capacity: usize, data_capacity: usize) -> Self { - let mut offsets_buffer = - MutableBuffer::with_capacity((item_capacity + 1) * size_of::<i64>()); + let capacity = item_capacity + .checked_add(1) + .map(|i| i.saturating_mul(size_of::<i64>())) + .expect("capacity integer overflow"); + + let mut offsets_buffer = MutableBuffer::with_capacity(capacity); // SAFETY: the first offset value is definitely not going to exceed the bounds. unsafe { offsets_buffer.push_unchecked(0_i64) }; Self { @@ -347,7 +355,7 @@ impl LargeStringArrayBuilder { .len() .try_into() .expect("byte array offset overflow"); - unsafe { self.offsets_buffer.push_unchecked(next_offset) }; + self.offsets_buffer.push(next_offset); } /// Finalise the builder into a concrete [`LargeStringArray`]. @@ -452,3 +460,20 @@ impl ColumnarValueRef<'_> { } } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + #[should_panic(expected = "capacity integer overflow")] + fn test_overflow_string_array_builder() { + let _builder = StringArrayBuilder::with_capacity(usize::MAX, usize::MAX); + } + + #[test] + #[should_panic(expected = "capacity integer overflow")] + fn test_overflow_large_string_array_builder() { + let _builder = LargeStringArrayBuilder::with_capacity(usize::MAX, usize::MAX); + } +}