Skip to content

Commit

Permalink
improve null handling for to_char (#9689)
Browse files Browse the repository at this point in the history
* improve null handling for to_char

* early return from to_char for null format

* remove invalid comment, update example

* rename column for consistency across platforms for tests

* return None instead of empty string from to_char

* use arrow:new_null_array for fast init
  • Loading branch information
tinfoil-knight authored Mar 24, 2024
1 parent 67e0bd3 commit 227d1f8
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 10 deletions.
19 changes: 19 additions & 0 deletions datafusion-examples/examples/to_char.rs
Original file line number Diff line number Diff line change
Expand Up @@ -193,5 +193,24 @@ async fn main() -> Result<()> {
&result
);

// output format is null

let result = ctx
.sql("SELECT to_char(arrow_cast(123456, 'Duration(Second)'), null) as result")
.await?
.collect()
.await?;

assert_batches_eq!(
&[
"+--------+",
"| result |",
"+--------+",
"| |",
"+--------+",
],
&result
);

Ok(())
}
32 changes: 25 additions & 7 deletions datafusion/functions/src/datetime/to_char.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use std::any::Any;
use std::sync::Arc;

use arrow::array::cast::AsArray;
use arrow::array::{Array, ArrayRef, StringArray};
use arrow::array::{new_null_array, Array, ArrayRef, StringArray};
use arrow::datatypes::DataType;
use arrow::datatypes::DataType::{
Date32, Date64, Duration, Time32, Time64, Timestamp, Utf8,
Expand Down Expand Up @@ -109,7 +109,6 @@ impl ScalarUDFImpl for ToCharFunc {
}

match &args[1] {
// null format, use default formats
ColumnarValue::Scalar(ScalarValue::Utf8(None))
| ColumnarValue::Scalar(ScalarValue::Null) => {
_to_char_scalar(args[0].clone(), None)
Expand Down Expand Up @@ -175,6 +174,18 @@ fn _to_char_scalar(
let data_type = &expression.data_type();
let is_scalar_expression = matches!(&expression, ColumnarValue::Scalar(_));
let array = expression.into_array(1)?;

if format.is_none() {
if is_scalar_expression {
return Ok(ColumnarValue::Scalar(ScalarValue::Utf8(None)));
} else {
return Ok(ColumnarValue::Array(new_null_array(
&DataType::Utf8,
array.len(),
)));
}
}

let format_options = match _build_format_options(data_type, format) {
Ok(value) => value,
Err(value) => return value,
Expand Down Expand Up @@ -202,7 +213,7 @@ fn _to_char_scalar(

fn _to_char_array(args: &[ColumnarValue]) -> Result<ColumnarValue> {
let arrays = ColumnarValue::values_to_arrays(args)?;
let mut results: Vec<String> = vec![];
let mut results: Vec<Option<String>> = vec![];
let format_array = arrays[1].as_string::<i32>();
let data_type = arrays[0].data_type();

Expand All @@ -212,6 +223,10 @@ fn _to_char_array(args: &[ColumnarValue]) -> Result<ColumnarValue> {
} else {
Some(format_array.value(idx))
};
if format.is_none() {
results.push(None);
continue;
}
let format_options = match _build_format_options(data_type, format) {
Ok(value) => value,
Err(value) => return value,
Expand All @@ -221,7 +236,7 @@ fn _to_char_array(args: &[ColumnarValue]) -> Result<ColumnarValue> {
let formatter = ArrayFormatter::try_new(arrays[0].as_ref(), &format_options)?;
let result = formatter.value(idx).try_to_string();
match result {
Ok(value) => results.push(value),
Ok(value) => results.push(Some(value)),
Err(e) => return exec_err!("{}", e),
}
}
Expand All @@ -230,9 +245,12 @@ fn _to_char_array(args: &[ColumnarValue]) -> Result<ColumnarValue> {
ColumnarValue::Array(_) => Ok(ColumnarValue::Array(Arc::new(StringArray::from(
results,
)) as ArrayRef)),
ColumnarValue::Scalar(_) => Ok(ColumnarValue::Scalar(ScalarValue::Utf8(Some(
results.first().unwrap().to_string(),
)))),
ColumnarValue::Scalar(_) => match results.first().unwrap() {
Some(value) => Ok(ColumnarValue::Scalar(ScalarValue::Utf8(Some(
value.to_string(),
)))),
None => Ok(ColumnarValue::Scalar(ScalarValue::Utf8(None))),
},
}
}

Expand Down
14 changes: 11 additions & 3 deletions datafusion/sqllogictest/test_files/timestamps.slt
Original file line number Diff line number Diff line change
Expand Up @@ -2661,7 +2661,7 @@ PT123456S
query T
select to_char(arrow_cast(123456, 'Duration(Second)'), null);
----
PT123456S
NULL

query error DataFusion error: Execution error: Cast error: Format error
SELECT to_char(timestamps, '%X%K') from formats;
Expand All @@ -2672,14 +2672,22 @@ SELECT to_char('2000-02-03'::date, '%X%K');
query T
SELECT to_char(timestamps, null) from formats;
----
2024-01-01T06:00:00Z
2025-01-01T23:59:58Z
NULL
NULL

query T
SELECT to_char(null, '%d-%m-%Y');
----
(empty)

query T
SELECT to_char(column1, column2)
FROM
(VALUES ('2024-01-01 06:00:00'::timestamp, null), ('2025-01-01 23:59:58'::timestamp, '%d:%m:%Y %H-%M-%S'));
----
NULL
01:01:2025 23-59-58

statement ok
drop table formats;

Expand Down

0 comments on commit 227d1f8

Please sign in to comment.