Skip to content

Commit

Permalink
test(11367): write test to demonstrate issue 11367
Browse files Browse the repository at this point in the history
  • Loading branch information
wiedld committed Jul 16, 2024
1 parent e693d43 commit a484974
Showing 1 changed file with 140 additions and 0 deletions.
140 changes: 140 additions & 0 deletions datafusion/common/src/file_options/parquet_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -481,4 +481,144 @@ mod tests {
"the writer_props should have the same configuration as the session's TableParquetOptions",
);
}

#[test]
fn test_defaults_match() {
// ensure the global settings are the same
let default_table_writer_opts = TableParquetOptions::default();
let default_parquet_opts = ParquetOptions::default();
assert_eq!(
default_table_writer_opts.global,
default_parquet_opts,
"should have matching defaults for TableParquetOptions.global and ParquetOptions",
);

// confirm the TableParquetOptions::default matches the WriterProperties::default, once both converted to WriterProperties
let default_writer_props = WriterProperties::new();
let from_datafusion_defaults =
WriterPropertiesBuilder::try_from(&default_table_writer_opts)
.unwrap()
.build();

// Expected: how the defaults should not match
assert_ne!(
default_writer_props.created_by(),
from_datafusion_defaults.created_by(),
"should have different created_by sources",
);
assert!(
default_writer_props.created_by().starts_with("parquet-rs version"),
"should indicate that writer_props defaults came from the extern parquet crate",
);
assert!(
default_table_writer_opts
.global
.created_by
.starts_with("datafusion version"),
"should indicate that table_parquet_opts defaults came from datafusion",
);

// TODO: in a followup PR, start the discussion of which of these defaults should be changed,
// and if the change should be in datafusion's default or the extern parquet's default.
// refer to https://github.com/apache/datafusion/issues/11367

// TODO: does not match
assert_eq!(
default_writer_props.compression(&"default".into()),
Compression::UNCOMPRESSED,
"extern parquet's default is None"
);
assert!(
matches!(
from_datafusion_defaults.compression(&"foo".into()),
Compression::ZSTD(_)
),
"datafusion's default is zstd"
);

// TODO: does not match
assert_eq!(
default_writer_props.data_page_row_count_limit(),
20_000,
"extern parquet's default data_page_row_count_limit is 20_000"
);
assert_eq!(
from_datafusion_defaults.data_page_row_count_limit(),
usize::MAX,
"datafusion's default is usize::MAX"
);

// TODO: does not match
assert_eq!(
default_writer_props.column_index_truncate_length(),
Some(64),
"extern parquet's default is 64"
);
assert_eq!(
from_datafusion_defaults.column_index_truncate_length(),
None,
"datafusion's default is None"
);

// TODO: does not match
assert!(
default_writer_props.dictionary_enabled(&"default".into()),
"extern parquet's default is true"
);
assert!(
!from_datafusion_defaults.dictionary_enabled(&"foo".into()),
"datafusion's default is false"
);

// TODO: matches once create WriterProps, but only due to parquet's override
assert_eq!(
default_writer_props.statistics_enabled(&"default".into()),
EnabledStatistics::Page,
"extern parquet's default is page"
);
assert_eq!(
default_table_writer_opts.global.statistics_enabled, None,
"datafusion's has no default"
);
assert_eq!(
from_datafusion_defaults.statistics_enabled(&"foo".into()),
EnabledStatistics::Page,
"should see the extern parquet's default over-riding datafusion's None",
);

// TODO: matches once create WriterProps, but only due to parquet's override
assert_eq!(
default_writer_props.max_statistics_size(&"default".into()),
4096,
"extern parquet's default is 4096"
);
assert_eq!(
default_table_writer_opts.global.max_statistics_size, None,
"datafusion's has no default"
);
assert_eq!(
default_writer_props.max_statistics_size(&"default".into()),
4096,
"should see the extern parquet's default over-riding datafusion's None",
);

// TODO: temporarily set the extern parquet's defaults back to the datafusion defaults.
let mut from_extern_parquet =
session_config_from_writer_props(&default_writer_props);
from_extern_parquet.global.compression = Some("zstd(3)".into());
from_extern_parquet.global.data_page_row_count_limit = usize::MAX;
from_extern_parquet.global.column_index_truncate_length = None;
from_extern_parquet.global.dictionary_enabled = None;
from_extern_parquet.global.statistics_enabled = None;
from_extern_parquet.global.max_statistics_size = None;

// Expected: the remaining should match
let same_created_by = default_table_writer_opts.global.created_by.clone(); // we expect these to be different
from_extern_parquet.global.created_by = same_created_by; // we expect these to be different
assert_eq!(
default_table_writer_opts,
from_extern_parquet,
"the writer_props should have the same configuration as the session's TableParquetOptions",
);
}
}

0 comments on commit a484974

Please sign in to comment.