-
Notifications
You must be signed in to change notification settings - Fork 32
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: Layout metadata reader and column statistics (#1455)
Adds a dedicated file metadata reader and uses it to provide DataFusion with file-level statistics. --------- Co-authored-by: Will Manning <[email protected]> Co-authored-by: Daniel King <[email protected]>
- Loading branch information
1 parent
866d892
commit 630835b
Showing
16 changed files
with
486 additions
and
40 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,3 +2,4 @@ pub mod config; | |
pub mod execution; | ||
pub mod format; | ||
pub mod opener; | ||
pub mod statistics; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
use arrow_array::cast::AsArray; | ||
use arrow_array::types::UInt64Type; | ||
use datafusion::functions_aggregate::min_max::{MaxAccumulator, MinAccumulator}; | ||
use datafusion_common::stats::Precision; | ||
use datafusion_common::ColumnStatistics; | ||
use datafusion_expr::Accumulator; | ||
use vortex_array::array::StructArray; | ||
use vortex_array::variants::StructArrayTrait as _; | ||
use vortex_array::IntoCanonical; | ||
use vortex_error::VortexResult; | ||
|
||
pub fn array_to_col_statistics(array: StructArray) -> VortexResult<ColumnStatistics> { | ||
let mut stats = ColumnStatistics::new_unknown(); | ||
|
||
if let Some(null_count_array) = array.field_by_name("null_count") { | ||
let array = null_count_array.into_canonical()?.into_arrow()?; | ||
let array = array.as_primitive::<UInt64Type>(); | ||
|
||
let null_count = array.iter().map(|v| v.unwrap_or_default()).sum::<u64>(); | ||
stats.null_count = Precision::Exact(null_count as usize); | ||
} | ||
|
||
if let Some(max_value_array) = array.field_by_name("max") { | ||
let array = max_value_array.into_canonical()?.into_arrow()?; | ||
let mut acc = MaxAccumulator::try_new(array.data_type())?; | ||
acc.update_batch(&[array])?; | ||
|
||
let max_val = acc.evaluate()?; | ||
stats.max_value = Precision::Exact(max_val) | ||
} | ||
|
||
if let Some(min_value_array) = array.field_by_name("min") { | ||
let array = min_value_array.into_canonical()?.into_arrow()?; | ||
let mut acc = MinAccumulator::try_new(array.data_type())?; | ||
acc.update_batch(&[array])?; | ||
|
||
let max_val = acc.evaluate()?; | ||
stats.min_value = Precision::Exact(max_val) | ||
} | ||
|
||
Ok(stats) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.