diff --git a/src/common/src/hash/key.rs b/src/common/src/hash/key.rs index 90ac637b0567f..dc028928a3369 100644 --- a/src/common/src/hash/key.rs +++ b/src/common/src/hash/key.rs @@ -39,13 +39,15 @@ use crate::array::{ StructRef, }; use crate::estimate_size::EstimateSize; -use crate::row::{OwnedRow, RowDeserializer}; +use crate::row::{OwnedRow, Row, RowDeserializer}; use crate::types::{ DataType, Date, Decimal, Int256Ref, JsonbRef, ScalarRef, Serial, Time, Timestamp, F32, F64, }; use crate::util::hash_util::{Crc32FastBuilder, XxHash64Builder}; use crate::util::iter_util::ZipEqFast; -use crate::util::value_encoding::{deserialize_datum, serialize_datum_into}; +use crate::util::value_encoding::{ + deserialize_datum, estimate_serialize_datum_size, serialize_datum_into, +}; /// This is determined by the stack based data structure we use, /// `StackNullBitmap`, which can store 64 bits at most. @@ -911,18 +913,19 @@ impl HashKey for SerializedKey { data_chunk: &DataChunk, hash_codes: Vec, ) -> Vec { - let estimated_key_size = data_chunk.estimate_value_encoding_size(column_idxes); - // for column_idx in column_idxes { - // data_chunk - // .column_at(*column_idx) - // .array_ref() - // .(&mut serializers[..]); - // } - // + // let estimated_key_size = data_chunk.estimate_value_encoding_size(column_idxes); + let estimated_key_sizes = data_chunk + .rows() + .map(|r| r.iter().map(estimate_serialize_datum_size).sum::()) + .collect_vec(); + // Construct serializers for each row. let mut serializers: Vec = hash_codes .into_iter() - .map(|hashcode| Self::S::from_hash_code(hashcode, estimated_key_size)) + .zip_eq_fast(estimated_key_sizes) + .map(|(hashcode, estimated_key_size)| { + Self::S::from_hash_code(hashcode, estimated_key_size) + }) .collect(); for column_idx in column_idxes {