Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(5975/5976): timezone handling for timestamps and date_trunc, date_part and date_bin #7614

Merged
merged 13 commits into from
Sep 23, 2023
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions datafusion/expr/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ path = "src/lib.rs"
[dependencies]
ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] }
arrow = { workspace = true }
arrow-array = { workspace = true }
datafusion-common = { path = "../common", version = "31.0.0" }
sqlparser = { workspace = true }
strum = { version = "0.25.0", features = ["derive"] }
Expand Down
32 changes: 30 additions & 2 deletions datafusion/expr/src/built_in_function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1020,9 +1020,13 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::DateTrunc => Signature::one_of(
vec![
Exact(vec![Utf8, Timestamp(Nanosecond, None)]),
Exact(vec![Utf8, Timestamp(Nanosecond, Some("+TZ".into()))]),
Exact(vec![Utf8, Timestamp(Microsecond, None)]),
Exact(vec![Utf8, Timestamp(Microsecond, Some("+TZ".into()))]),
Exact(vec![Utf8, Timestamp(Millisecond, None)]),
Exact(vec![Utf8, Timestamp(Millisecond, Some("+TZ".into()))]),
Exact(vec![Utf8, Timestamp(Second, None)]),
Exact(vec![Utf8, Timestamp(Second, Some("+TZ".into()))]),
],
self.volatility(),
),
Expand All @@ -1034,16 +1038,37 @@ impl BuiltinScalarFunction {
Timestamp(array_type.clone(), None),
Timestamp(Nanosecond, None),
]),
Exact(vec![
Interval(MonthDayNano),
Timestamp(array_type.clone(), Some("+TZ".into())),
Timestamp(Nanosecond, Some("+TZ".into())),
]),
Exact(vec![
Interval(DayTime),
Timestamp(array_type.clone(), None),
Timestamp(Nanosecond, None),
]),
Exact(vec![
Interval(DayTime),
Timestamp(array_type.clone(), Some("+TZ".into())),
Timestamp(Nanosecond, Some("+TZ".into())),
]),
Exact(vec![
Interval(MonthDayNano),
Timestamp(array_type.clone(), None),
]),
Exact(vec![
Interval(MonthDayNano),
Timestamp(array_type.clone(), Some("+TZ".into())),
]),
Exact(vec![
Interval(DayTime),
Timestamp(array_type.clone(), None),
]),
Exact(vec![Interval(DayTime), Timestamp(array_type, None)]),
Exact(vec![
Interval(DayTime),
Timestamp(array_type, Some("+TZ".into())),
]),
]
};

Expand All @@ -1060,10 +1085,13 @@ impl BuiltinScalarFunction {
Exact(vec![Utf8, Date32]),
Exact(vec![Utf8, Date64]),
Exact(vec![Utf8, Timestamp(Second, None)]),
Exact(vec![Utf8, Timestamp(Second, Some("+TZ".into()))]),
Exact(vec![Utf8, Timestamp(Microsecond, None)]),
Exact(vec![Utf8, Timestamp(Microsecond, Some("+TZ".into()))]),
Exact(vec![Utf8, Timestamp(Millisecond, None)]),
Exact(vec![Utf8, Timestamp(Millisecond, Some("+TZ".into()))]),
Exact(vec![Utf8, Timestamp(Nanosecond, None)]),
Exact(vec![Utf8, Timestamp(Nanosecond, Some("+00:00".into()))]),
Exact(vec![Utf8, Timestamp(Nanosecond, Some("+TZ".into()))]),
],
self.volatility(),
),
Expand Down
139 changes: 89 additions & 50 deletions datafusion/expr/src/type_coercion/functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ use arrow::{
datatypes::{DataType, TimeUnit},
};
use datafusion_common::{plan_err, DataFusionError, Result};
use std::str::FromStr;

/// Performs type coercion for function arguments.
///
Expand Down Expand Up @@ -120,7 +121,7 @@ fn maybe_data_types(
new_type.push(current_type.clone())
} else {
// attempt to coerce
if can_coerce_from(valid_type, current_type) {
if let Some(valid_type) = coerced_from(valid_type, current_type) {
new_type.push(valid_type.clone())
} else {
// not possible
Expand All @@ -136,62 +137,100 @@ fn maybe_data_types(
///
/// See the module level documentation for more detail on coercion.
pub fn can_coerce_from(type_into: &DataType, type_from: &DataType) -> bool {
use self::DataType::*;

if type_into == type_from {
return true;
}
// Null can convert to most of types
if let Some(coerced) = coerced_from(type_into, type_from) {
return coerced == type_into;
}
false
}

fn coerced_from<'a>(
type_into: &'a DataType,
type_from: &'a DataType,
) -> Option<&'a DataType> {
use self::DataType::*;

match type_into {
Int8 => matches!(type_from, Null | Int8),
Int16 => matches!(type_from, Null | Int8 | Int16 | UInt8),
Int32 => matches!(type_from, Null | Int8 | Int16 | Int32 | UInt8 | UInt16),
Int64 => matches!(
type_from,
Null | Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32
),
UInt8 => matches!(type_from, Null | UInt8),
UInt16 => matches!(type_from, Null | UInt8 | UInt16),
UInt32 => matches!(type_from, Null | UInt8 | UInt16 | UInt32),
UInt64 => matches!(type_from, Null | UInt8 | UInt16 | UInt32 | UInt64),
Float32 => matches!(
type_from,
Null | Int8
| Int16
| Int32
| Int64
| UInt8
| UInt16
| UInt32
| UInt64
| Float32
),
Float64 => matches!(
type_from,
Null | Int8
| Int16
| Int32
| Int64
| UInt8
| UInt16
| UInt32
| UInt64
| Float32
| Float64
| Decimal128(_, _)
),
Timestamp(TimeUnit::Nanosecond, _) => {
matches!(
// coerced into type_into
Int8 if matches!(type_from, Null | Int8) => Some(type_into),
Int16 if matches!(type_from, Null | Int8 | Int16 | UInt8) => Some(type_into),
Int32 if matches!(type_from, Null | Int8 | Int16 | Int32 | UInt8 | UInt16) => {
Some(type_into)
}
Int64
if matches!(
type_from,
Null | Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32
) =>
{
Some(type_into)
}
UInt8 if matches!(type_from, Null | UInt8) => Some(type_into),
UInt16 if matches!(type_from, Null | UInt8 | UInt16) => Some(type_into),
UInt32 if matches!(type_from, Null | UInt8 | UInt16 | UInt32) => Some(type_into),
UInt64 if matches!(type_from, Null | UInt8 | UInt16 | UInt32 | UInt64) => {
Some(type_into)
}
Float32
if matches!(
type_from,
Null | Timestamp(_, _) | Date32 | Utf8 | LargeUtf8
)
Null | Int8
| Int16
| Int32
| Int64
| UInt8
| UInt16
| UInt32
| UInt64
| Float32
) =>
{
Some(type_into)
}
Interval(_) => {
matches!(type_from, Utf8 | LargeUtf8)
Float64
if matches!(
type_from,
Null | Int8
| Int16
| Int32
| Int64
| UInt8
| UInt16
| UInt32
| UInt64
| Float32
| Float64
| Decimal128(_, _)
) =>
{
Some(type_into)
}
Timestamp(TimeUnit::Nanosecond, None)
if matches!(
type_from,
Null | Timestamp(_, None) | Date32 | Utf8 | LargeUtf8
) =>
{
Some(type_into)
}
Utf8 | LargeUtf8 => true,
Null => can_cast_types(type_from, type_into),
_ => false,
Interval(_) if matches!(type_from, Utf8 | LargeUtf8) => Some(type_into),
Utf8 | LargeUtf8 => Some(type_into),
Null if can_cast_types(type_from, type_into) => Some(type_into),

// timestamp coercions, with timezone, accept the type_from timezone if valid
wiedld marked this conversation as resolved.
Show resolved Hide resolved
Timestamp(TimeUnit::Nanosecond, Some(_))
if matches!(
type_from,
Timestamp(TimeUnit::Nanosecond, Some(from_tz)) if arrow_array::timezone::Tz::from_str(from_tz).is_ok()
wiedld marked this conversation as resolved.
Show resolved Hide resolved
) =>
{
Some(type_from)
}

// cannot coerce
_ => None,
}
}

Expand Down
Loading
Loading