From 6e9a0360f6fc614cc07421201cb2a070dbc84fe8 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sun, 18 Aug 2024 08:39:40 -0400 Subject: [PATCH] refine docs --- datafusion/physical-plan/src/coalesce/mod.rs | 38 ++++++++++++++------ 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/datafusion/physical-plan/src/coalesce/mod.rs b/datafusion/physical-plan/src/coalesce/mod.rs index d5b68560fdf95..5befa5ecda99b 100644 --- a/datafusion/physical-plan/src/coalesce/mod.rs +++ b/datafusion/physical-plan/src/coalesce/mod.rs @@ -111,8 +111,8 @@ impl BatchCoalescer { Arc::clone(&self.schema) } - /// Given a batch, it updates the buffer of [`BatchCoalescer`]. It returns - /// a variant of [`CoalescerState`] indicating the final state of the buffer. + /// Push next batch, and returns [`CoalescerState`] indicating the current + /// state of the buffer. pub fn push_batch(&mut self, batch: RecordBatch) -> CoalescerState { let batch = gc_string_view_batch(&batch); if self.limit_reached(&batch) { @@ -129,9 +129,13 @@ impl BatchCoalescer { self.buffer.is_empty() } - /// The function checks if the buffer can reach the specified limit after getting `batch`. - /// If it does, it slices the received batch as needed, updates the buffer with it, and - /// finally returns `true`. Otherwise; the function does nothing and returns `false`. + /// Checks if the buffer will reach the specified limit after getting + /// `batch`. + /// + /// If fetch would be exceeded, slices the received batch, updates the + /// buffer with it, and returns `true`. + /// + /// Otherwise: does nothing and returns `false`. fn limit_reached(&mut self, batch: &RecordBatch) -> bool { match self.fetch { Some(fetch) if self.total_rows + batch.num_rows() >= fetch => { @@ -149,8 +153,10 @@ impl BatchCoalescer { } } - /// Updates the buffer with the given batch. If the target batch size is reached, - /// the function returns `true`. Otherwise, it returns `false`. + /// Updates the buffer with the given batch. + /// + /// If the target batch size is reached, returns `true`. Otherwise, returns + /// `false`. fn target_reached(&mut self, batch: RecordBatch) -> bool { if batch.num_rows() == 0 { false @@ -171,21 +177,31 @@ impl BatchCoalescer { } } -/// This enumeration acts as a status indicator for the [`BatchCoalescer`] after a +/// Indicates the state of the [`BatchCoalescer`] buffer after the /// [`BatchCoalescer::push_batch()`] operation. +/// +/// The caller should take diferent actions, depending on the variant returned. pub enum CoalescerState { /// Neither the limit nor the target batch size is reached. + /// + /// Action: continue pushing batches. Continue, - /// The sufficient row count to produce a complete query result is reached. + /// The limit has been reached. + /// + /// Action: call [`BatchCoalescer::finish_batch()`] to get the final + /// buffered results as a batch and finish the query. LimitReached, /// The specified minimum number of rows a batch should have is reached. + /// + /// Action: call [`BatchCoalescer::finish_batch()`] to get the current + /// buffered results as a batch and then continue pushing batches. TargetReached, } /// Heuristically compact `StringViewArray`s to reduce memory usage, if needed /// -/// This function decides when to consolidate the StringView into a new buffer -/// to reduce memory usage and improve string locality for better performance. +/// Decides when to consolidate the StringView into a new buffer to reduce +/// memory usage and improve string locality for better performance. /// /// This differs from `StringViewArray::gc` because: /// 1. It may not compact the array depending on a heuristic.