Skip to content

Commit

Permalink
update tests
Browse files Browse the repository at this point in the history
  • Loading branch information
XiangpengHao committed Aug 7, 2024
1 parent 297b879 commit 1732978
Show file tree
Hide file tree
Showing 14 changed files with 69 additions and 39 deletions.
17 changes: 15 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
# under the License.

[workspace]
exclude = ["datafusion-cli", "dev/depcheck"]
exclude = ["datafusion-cli", "dev/depcheck", "datafusion-examples"]
members = [
"datafusion/common",
"datafusion/common-runtime",
Expand All @@ -40,7 +40,6 @@ members = [
"datafusion/sqllogictest",
"datafusion/substrait",
"datafusion/wasmtest",
"datafusion-examples",
"docs",
"test-utils",
"benchmarks",
Expand Down Expand Up @@ -158,3 +157,17 @@ large_futures = "warn"
[workspace.lints.rust]
unexpected_cfgs = { level = "warn", check-cfg = ["cfg(tarpaulin)"] }
unused_imports = "deny"

[patch.crates-io]
arrow = { git = "https://github.com/apache/arrow-rs.git" }
arrow-array = { git = "https://github.com/apache/arrow-rs.git" }
arrow-buffer = { git = "https://github.com/apache/arrow-rs.git" }
arrow-cast = { git = "https://github.com/apache/arrow-rs.git" }
arrow-data = { git = "https://github.com/apache/arrow-rs.git" }
arrow-ipc = { git = "https://github.com/apache/arrow-rs.git" }
arrow-schema = { git = "https://github.com/apache/arrow-rs.git" }
arrow-select = { git = "https://github.com/apache/arrow-rs.git" }
arrow-string = { git = "https://github.com/apache/arrow-rs.git" }
arrow-ord = { git = "https://github.com/apache/arrow-rs.git" }
arrow-flight = { git = "https://github.com/apache/arrow-rs.git" }
parquet = { git = "https://github.com/apache/arrow-rs.git" }
14 changes: 14 additions & 0 deletions benchmarks/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,17 @@ tokio = { workspace = true, features = ["rt-multi-thread", "parking_lot"] }

[dev-dependencies]
datafusion-proto = { workspace = true }

[patch.crates-io]
arrow = { git = "https://github.com/apache/arrow-rs.git" }
arrow-array = { git = "https://github.com/apache/arrow-rs.git" }
arrow-buffer = { git = "https://github.com/apache/arrow-rs.git" }
arrow-cast = { git = "https://github.com/apache/arrow-rs.git" }
arrow-data = { git = "https://github.com/apache/arrow-rs.git" }
arrow-ipc = { git = "https://github.com/apache/arrow-rs.git" }
arrow-schema = { git = "https://github.com/apache/arrow-rs.git" }
arrow-select = { git = "https://github.com/apache/arrow-rs.git" }
arrow-string = { git = "https://github.com/apache/arrow-rs.git" }
arrow-ord = { git = "https://github.com/apache/arrow-rs.git" }
arrow-flight = { git = "https://github.com/apache/arrow-rs.git" }
parquet = { git = "https://github.com/apache/arrow-rs.git" }
2 changes: 1 addition & 1 deletion datafusion/common/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ libc = "0.2.140"
num_cpus = { workspace = true }
object_store = { workspace = true, optional = true }
parquet = { workspace = true, optional = true, default-features = true }
pyo3 = { version = "0.21.0", optional = true }
pyo3 = { version = "0.22.0", optional = true }
sqlparser = { workspace = true }

[target.'cfg(target_family = "wasm")'.dependencies]
Expand Down
2 changes: 1 addition & 1 deletion datafusion/common/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -472,7 +472,7 @@ config_namespace! {

/// (reading) If true, parquet reader will read columns of `Utf8/Utf8Large` with `Utf8View`,
/// and `Binary/BinaryLarge` with `BinaryView`.
pub schema_force_string_view: bool, default = false
pub schema_force_string_view: bool, default = true
}
}

Expand Down
6 changes: 3 additions & 3 deletions datafusion/common/src/scalar/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4329,7 +4329,7 @@ mod tests {
.strip_backtrace();
assert_eq!(
err,
"Arrow error: Compute error: Overflow happened on: 2147483647 - -2147483648"
"Arrow error: Arithmetic overflow: Overflow happened on: 2147483647 - -2147483648"
)
}

Expand All @@ -4350,7 +4350,7 @@ mod tests {
.sub_checked(&int_value_2)
.unwrap_err()
.strip_backtrace();
assert_eq!(err, "Arrow error: Compute error: Overflow happened on: 9223372036854775807 - -9223372036854775808")
assert_eq!(err, "Arrow error: Arithmetic overflow: Overflow happened on: 9223372036854775807 - -9223372036854775808")
}

#[test]
Expand Down Expand Up @@ -5866,7 +5866,7 @@ mod tests {
let root_err = err.find_root();
match root_err{
DataFusionError::ArrowError(
ArrowError::ComputeError(_),
ArrowError::ArithmeticOverflow(_),
_,
) => {}
_ => return Err(err),
Expand Down
10 changes: 5 additions & 5 deletions datafusion/core/src/datasource/file_format/parquet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1248,7 +1248,7 @@ mod tests {
use arrow_schema::{DataType, Field};
use async_trait::async_trait;
use datafusion_common::cast::{
as_binary_array, as_boolean_array, as_float32_array, as_float64_array,
as_binary_view_array, as_boolean_array, as_float32_array, as_float64_array,
as_int32_array, as_timestamp_nanosecond_array,
};
use datafusion_common::config::ParquetOptions;
Expand Down Expand Up @@ -1799,8 +1799,8 @@ mod tests {
bigint_col: Int64\n\
float_col: Float32\n\
double_col: Float64\n\
date_string_col: Binary\n\
string_col: Binary\n\
date_string_col: BinaryView\n\
string_col: BinaryView\n\
timestamp_col: Timestamp(Nanosecond, None)",
y
);
Expand Down Expand Up @@ -1956,7 +1956,7 @@ mod tests {
assert_eq!(1, batches[0].num_columns());
assert_eq!(8, batches[0].num_rows());

let array = as_binary_array(batches[0].column(0))?;
let array = as_binary_view_array(batches[0].column(0))?;
let mut values: Vec<&str> = vec![];
for i in 0..batches[0].num_rows() {
values.push(std::str::from_utf8(array.value(i)).unwrap());
Expand Down Expand Up @@ -2070,7 +2070,7 @@ mod tests {
let int_col_offset = offset_index.get(4).unwrap();

// 325 pages in int_col
assert_eq!(int_col_offset.len(), 325);
assert_eq!(int_col_offset.page_locations().len(), 325);
match int_col_index {
Index::INT32(index) => {
assert_eq!(index.indexes.len(), 325);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -406,7 +406,7 @@ impl<'a> PagesPruningStatistics<'a> {
converter,
column_index,
offset_index,
page_offsets,
page_offsets: &page_offsets.page_locations,
})
}

Expand Down
2 changes: 1 addition & 1 deletion datafusion/functions/src/regex/regexpreplace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -402,7 +402,7 @@ fn _regexp_replace_static_pattern_replace<T: OffsetSizeTrait>(
let string_view_array = as_string_view_array(&args[0])?;

let mut builder = StringViewBuilder::with_capacity(string_view_array.len())
.with_block_size(1024 * 1024 * 2);
.with_fixed_block_size(1024 * 1024 * 2);

for val in string_view_array.iter() {
if let Some(val) = val {
Expand Down
2 changes: 1 addition & 1 deletion datafusion/physical-expr-common/src/binary_view_map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ where
output_type,
map: hashbrown::raw::RawTable::with_capacity(INITIAL_MAP_CAPACITY),
map_size: 0,
builder: GenericByteViewBuilder::new().with_block_size(2 * 1024 * 1024),
builder: GenericByteViewBuilder::new().with_fixed_block_size(2 * 1024 * 1024),
random_state: RandomState::new(),
hashes_buffer: vec![],
null: None,
Expand Down
5 changes: 3 additions & 2 deletions datafusion/physical-plan/src/coalesce_batches.rs
Original file line number Diff line number Diff line change
Expand Up @@ -494,7 +494,7 @@ fn gc_string_view_batch(batch: &RecordBatch) -> RecordBatch {
// See https://github.com/apache/arrow-rs/issues/6094 for more details.
let mut builder = StringViewBuilder::with_capacity(s.len());
if ideal_buffer_size > 0 {
builder = builder.with_block_size(ideal_buffer_size as u32);
builder = builder.with_fixed_block_size(ideal_buffer_size as u32);
}

for v in s.iter() {
Expand Down Expand Up @@ -804,7 +804,8 @@ mod tests {
impl StringViewTest {
/// Create a `StringViewArray` with the parameters specified in this struct
fn build(self) -> StringViewArray {
let mut builder = StringViewBuilder::with_capacity(100).with_block_size(8192);
let mut builder =
StringViewBuilder::with_capacity(100).with_fixed_block_size(8192);
loop {
for &v in self.strings.iter() {
builder.append_option(v);
Expand Down
18 changes: 9 additions & 9 deletions datafusion/sql/src/unparser/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2077,49 +2077,49 @@ mod tests {
"1 YEAR 1 MONTH 1 DAY 3 HOUR 10 MINUTE 20 SECOND",
),
IntervalStyle::PostgresVerbose,
r#"INTERVAL '0 YEARS 13 MONS 1 DAYS 3 HOURS 10 MINS 20.000000000 SECS'"#,
r#"INTERVAL '13 MONS 1 DAYS 3 HOURS 10 MINS 20.000000000 SECS'"#,
),
(
interval_month_day_nano_lit("1.5 MONTH"),
IntervalStyle::PostgresVerbose,
r#"INTERVAL '0 YEARS 1 MONS 15 DAYS 0 HOURS 0 MINS 0.000000000 SECS'"#,
r#"INTERVAL '1 MONS 15 DAYS'"#,
),
(
interval_month_day_nano_lit("-3 MONTH"),
IntervalStyle::PostgresVerbose,
r#"INTERVAL '0 YEARS -3 MONS 0 DAYS 0 HOURS 0 MINS 0.000000000 SECS'"#,
r#"INTERVAL '-3 MONS'"#,
),
(
interval_month_day_nano_lit("1 MONTH")
.add(interval_month_day_nano_lit("1 DAY")),
IntervalStyle::PostgresVerbose,
r#"(INTERVAL '0 YEARS 1 MONS 0 DAYS 0 HOURS 0 MINS 0.000000000 SECS' + INTERVAL '0 YEARS 0 MONS 1 DAYS 0 HOURS 0 MINS 0.000000000 SECS')"#,
r#"(INTERVAL '1 MONS' + INTERVAL '1 DAYS')"#,
),
(
interval_month_day_nano_lit("1 MONTH")
.sub(interval_month_day_nano_lit("1 DAY")),
IntervalStyle::PostgresVerbose,
r#"(INTERVAL '0 YEARS 1 MONS 0 DAYS 0 HOURS 0 MINS 0.000000000 SECS' - INTERVAL '0 YEARS 0 MONS 1 DAYS 0 HOURS 0 MINS 0.000000000 SECS')"#,
r#"(INTERVAL '1 MONS' - INTERVAL '1 DAYS')"#,
),
(
interval_datetime_lit("10 DAY 1 HOUR 10 MINUTE 20 SECOND"),
IntervalStyle::PostgresVerbose,
r#"INTERVAL '0 YEARS 0 MONS 10 DAYS 1 HOURS 10 MINS 20.000 SECS'"#,
r#"INTERVAL '10 DAYS 1 HOURS 10 MINS 20.000 SECS'"#,
),
(
interval_datetime_lit("10 DAY 1.5 HOUR 10 MINUTE 20 SECOND"),
IntervalStyle::PostgresVerbose,
r#"INTERVAL '0 YEARS 0 MONS 10 DAYS 1 HOURS 40 MINS 20.000 SECS'"#,
r#"INTERVAL '10 DAYS 1 HOURS 40 MINS 20.000 SECS'"#,
),
(
interval_year_month_lit("1 YEAR 1 MONTH"),
IntervalStyle::PostgresVerbose,
r#"INTERVAL '1 YEARS 1 MONS 0 DAYS 0 HOURS 0 MINS 0.00 SECS'"#,
r#"INTERVAL '1 YEARS 1 MONS'"#,
),
(
interval_year_month_lit("1.5 YEAR 1 MONTH"),
IntervalStyle::PostgresVerbose,
r#"INTERVAL '1 YEARS 7 MONS 0 DAYS 0 HOURS 0 MINS 0.00 SECS'"#,
r#"INTERVAL '1 YEARS 7 MONS'"#,
),
(
interval_year_month_lit("1 YEAR 1 MONTH"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,9 @@ pub(crate) fn convert_schema_to_types(columns: &Fields) -> Vec<DFColumnType> {
| DataType::Float64
| DataType::Decimal128(_, _)
| DataType::Decimal256(_, _) => DFColumnType::Float,
DataType::Utf8 | DataType::LargeUtf8 => DFColumnType::Text,
DataType::Utf8 | DataType::Utf8View | DataType::LargeUtf8 => {
DFColumnType::Text
}
DataType::Date32
| DataType::Date64
| DataType::Time32(_)
Expand Down
2 changes: 1 addition & 1 deletion datafusion/sqllogictest/test_files/arrow_typeof.slt
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,7 @@ select arrow_cast([1, 2, 3], 'FixedSizeList(3, Int64)');
[1, 2, 3]

# Tests for Utf8View
query ?T
query TT
select arrow_cast('MyAwesomeString', 'Utf8View'), arrow_typeof(arrow_cast('MyAwesomeString', 'Utf8View'))
----
MyAwesomeString Utf8View
Expand Down
22 changes: 11 additions & 11 deletions datafusion/sqllogictest/test_files/math.slt
Original file line number Diff line number Diff line change
Expand Up @@ -252,19 +252,19 @@ select abs(c1), abs(c2), abs(c3), abs(c4) from test_nullable_integer where datas
NULL NULL NULL NULL

# abs: Int8 overlow
statement error DataFusion error: Arrow error: Arithmetic overflow: Int8Array overflow on abs\(-128\)
statement error DataFusion error: Arrow error: Compute error: Int8Array overflow on abs\(-128\)
select abs(c1) from test_nullable_integer where dataset = 'mins'

# abs: Int16 overlow
statement error DataFusion error: Arrow error: Arithmetic overflow: Int16Array overflow on abs\(-32768\)
statement error DataFusion error: Arrow error: Compute error: Int16Array overflow on abs\(-32768\)
select abs(c2) from test_nullable_integer where dataset = 'mins'

# abs: Int32 overlow
statement error DataFusion error: Arrow error: Arithmetic overflow: Int32Array overflow on abs\(-2147483648\)
statement error DataFusion error: Arrow error: Compute error: Int32Array overflow on abs\(-2147483648\)
select abs(c3) from test_nullable_integer where dataset = 'mins'

# abs: Int64 overlow
statement error DataFusion error: Arrow error: Arithmetic overflow: Int64Array overflow on abs\(-9223372036854775808\)
statement error DataFusion error: Arrow error: Compute error: Int64Array overflow on abs\(-9223372036854775808\)
select abs(c4) from test_nullable_integer where dataset = 'mins'

statement ok
Expand Down Expand Up @@ -620,15 +620,15 @@ select gcd(a, b), gcd(c*d + 1, abs(e)) + f from signed_integers;
NULL NULL

# gcd(i64::MIN, i64::MIN)
query error DataFusion error: Arrow error: Arithmetic overflow:Signed integer overflow in GCD\(\-9223372036854775808, \-9223372036854775808\)
query error DataFusion error: Arrow error: Compute error: Signed integer overflow in GCD\(\-9223372036854775808, \-9223372036854775808\)
select gcd(-9223372036854775808, -9223372036854775808);

# gcd(i64::MIN, 0)
query error DataFusion error: Arrow error: Arithmetic overflow:Signed integer overflow in GCD\(\-9223372036854775808, 0\)
query error DataFusion error: Arrow error: Compute error: Signed integer overflow in GCD\(\-9223372036854775808, 0\)
select gcd(-9223372036854775808, 0);

# gcd(0, i64::MIN)
query error DataFusion error: Arrow error: Arithmetic overflow:Signed integer overflow in GCD\(0, \-9223372036854775808\)
query error DataFusion error: Arrow error: Compute error: Signed integer overflow in GCD\(0, \-9223372036854775808\)
select gcd(0, -9223372036854775808);


Expand Down Expand Up @@ -662,22 +662,22 @@ select lcm(a, b), lcm(c, d), lcm(e, f) from signed_integers;
NULL NULL NULL

# Result cannot fit in i64
query error DataFusion error: Arrow error: Arithmetic overflow:Signed integer overflow in LCM\(\-9223372036854775808, \-9223372036854775808\)
query error DataFusion error: Arrow error: Compute error: Signed integer overflow in LCM\(\-9223372036854775808, \-9223372036854775808\)
select lcm(-9223372036854775808, -9223372036854775808);

query error DataFusion error: Arrow error: Arithmetic overflow:Signed integer overflow in LCM\(1, \-9223372036854775808\)
query error DataFusion error: Arrow error: Compute error: Signed integer overflow in LCM\(1, \-9223372036854775808\)
select lcm(1, -9223372036854775808);

# Overflow on multiplication
query error DataFusion error: Arrow error: Arithmetic overflow:Signed integer overflow in LCM\(2, 9223372036854775803\)
query error DataFusion error: Arrow error: Compute error: Signed integer overflow in LCM\(2, 9223372036854775803\)
select lcm(2, 9223372036854775803);


query error DataFusion error: Arrow error: Arithmetic overflow: Overflow happened on: 2107754225 \^ 1221660777
select power(2107754225, 1221660777);

# factorial overflow
query error DataFusion error: Arrow error: Arithmetic overflow: Overflow happened on FACTORIAL\(350943270\)
query error DataFusion error: Arrow error: Compute error: Overflow happened on FACTORIAL\(350943270\)
select FACTORIAL(350943270);

statement ok
Expand Down

0 comments on commit 1732978

Please sign in to comment.