Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

improve null handling for to_char #9689

Merged
merged 7 commits into from
Mar 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions datafusion-examples/examples/to_char.rs
Original file line number Diff line number Diff line change
Expand Up @@ -193,5 +193,24 @@ async fn main() -> Result<()> {
&result
);

// output format is null

let result = ctx
.sql("SELECT to_char(arrow_cast(123456, 'Duration(Second)'), null) as result")
.await?
.collect()
.await?;

assert_batches_eq!(
&[
"+--------+",
"| result |",
"+--------+",
"| |",
"+--------+",
],
&result
);

Ok(())
}
32 changes: 25 additions & 7 deletions datafusion/functions/src/datetime/to_char.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use std::any::Any;
use std::sync::Arc;

use arrow::array::cast::AsArray;
use arrow::array::{Array, ArrayRef, StringArray};
use arrow::array::{new_null_array, Array, ArrayRef, StringArray};
use arrow::datatypes::DataType;
use arrow::datatypes::DataType::{
Date32, Date64, Duration, Time32, Time64, Timestamp, Utf8,
Expand Down Expand Up @@ -109,7 +109,6 @@ impl ScalarUDFImpl for ToCharFunc {
}

match &args[1] {
// null format, use default formats
ColumnarValue::Scalar(ScalarValue::Utf8(None))
| ColumnarValue::Scalar(ScalarValue::Null) => {
_to_char_scalar(args[0].clone(), None)
Expand Down Expand Up @@ -175,6 +174,18 @@ fn _to_char_scalar(
let data_type = &expression.data_type();
let is_scalar_expression = matches!(&expression, ColumnarValue::Scalar(_));
let array = expression.into_array(1)?;

if format.is_none() {
if is_scalar_expression {
return Ok(ColumnarValue::Scalar(ScalarValue::Utf8(None)));
} else {
return Ok(ColumnarValue::Array(new_null_array(
&DataType::Utf8,
array.len(),
)));
}
}

let format_options = match _build_format_options(data_type, format) {
Ok(value) => value,
Err(value) => return value,
Expand Down Expand Up @@ -202,7 +213,7 @@ fn _to_char_scalar(

fn _to_char_array(args: &[ColumnarValue]) -> Result<ColumnarValue> {
let arrays = ColumnarValue::values_to_arrays(args)?;
let mut results: Vec<String> = vec![];
let mut results: Vec<Option<String>> = vec![];
let format_array = arrays[1].as_string::<i32>();
let data_type = arrays[0].data_type();

Expand All @@ -212,6 +223,10 @@ fn _to_char_array(args: &[ColumnarValue]) -> Result<ColumnarValue> {
} else {
Some(format_array.value(idx))
};
if format.is_none() {
results.push(None);
continue;
}
let format_options = match _build_format_options(data_type, format) {
Ok(value) => value,
Err(value) => return value,
Expand All @@ -221,7 +236,7 @@ fn _to_char_array(args: &[ColumnarValue]) -> Result<ColumnarValue> {
let formatter = ArrayFormatter::try_new(arrays[0].as_ref(), &format_options)?;
let result = formatter.value(idx).try_to_string();
match result {
Ok(value) => results.push(value),
Ok(value) => results.push(Some(value)),
Err(e) => return exec_err!("{}", e),
}
}
Expand All @@ -230,9 +245,12 @@ fn _to_char_array(args: &[ColumnarValue]) -> Result<ColumnarValue> {
ColumnarValue::Array(_) => Ok(ColumnarValue::Array(Arc::new(StringArray::from(
results,
)) as ArrayRef)),
ColumnarValue::Scalar(_) => Ok(ColumnarValue::Scalar(ScalarValue::Utf8(Some(
results.first().unwrap().to_string(),
)))),
ColumnarValue::Scalar(_) => match results.first().unwrap() {
Some(value) => Ok(ColumnarValue::Scalar(ScalarValue::Utf8(Some(
value.to_string(),
)))),
None => Ok(ColumnarValue::Scalar(ScalarValue::Utf8(None))),
},
}
}

Expand Down
14 changes: 11 additions & 3 deletions datafusion/sqllogictest/test_files/timestamps.slt
Original file line number Diff line number Diff line change
Expand Up @@ -2661,7 +2661,7 @@ PT123456S
query T
select to_char(arrow_cast(123456, 'Duration(Second)'), null);
----
PT123456S
NULL

query error DataFusion error: Execution error: Cast error: Format error
SELECT to_char(timestamps, '%X%K') from formats;
Expand All @@ -2672,14 +2672,22 @@ SELECT to_char('2000-02-03'::date, '%X%K');
query T
SELECT to_char(timestamps, null) from formats;
----
2024-01-01T06:00:00Z
2025-01-01T23:59:58Z
NULL
NULL

query T
SELECT to_char(null, '%d-%m-%Y');
----
(empty)

query T
SELECT to_char(column1, column2)
FROM
(VALUES ('2024-01-01 06:00:00'::timestamp, null), ('2025-01-01 23:59:58'::timestamp, '%d:%m:%Y %H-%M-%S'));
----
NULL
01:01:2025 23-59-58

statement ok
drop table formats;

Expand Down
Loading