Skip to content

Commit

Permalink
fix: allow utf8 and largeutf8 to be cast into utf8view
Browse files Browse the repository at this point in the history
  • Loading branch information
tshauck committed Aug 2, 2024
1 parent 65ba700 commit f38b363
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 1 deletion.
1 change: 1 addition & 0 deletions datafusion/expr/src/expr_schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ impl ExprSchemable for Expr {
.iter()
.map(|e| e.get_type(schema))
.collect::<Result<Vec<_>>>()?;

// verify that function is invoked with correct number and type of arguments as defined in `TypeSignature`
data_types_with_scalar_udf(&arg_data_types, func).map_err(|err| {
plan_datafusion_err!(
Expand Down
14 changes: 14 additions & 0 deletions datafusion/expr/src/type_coercion/functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -573,6 +573,8 @@ fn coerced_from<'a>(
(Interval(_), _) if matches!(type_from, Utf8 | LargeUtf8) => {
Some(type_into.clone())
}
// We can go into a Utf8View from a Utf8 or LargeUtf8
(Utf8View, _) if matches!(type_from, Utf8 | LargeUtf8) => Some(type_into.clone()),
// Any type can be coerced into strings
(Utf8 | LargeUtf8, _) => Some(type_into.clone()),
(Null, _) if can_cast_types(type_from, type_into) => Some(type_into.clone()),
Expand Down Expand Up @@ -636,6 +638,18 @@ mod tests {
use super::*;
use arrow::datatypes::Field;

#[test]
fn test_string_conversion() {
let cases = vec![
(DataType::Utf8View, DataType::Utf8, true),
(DataType::Utf8View, DataType::LargeUtf8, true),
];

for case in cases {
assert_eq!(can_coerce_from(&case.0, &case.1), case.2);
}
}

#[test]
fn test_maybe_data_types() {
// this vec contains: arg1, arg2, expected result
Expand Down
2 changes: 1 addition & 1 deletion datafusion/sqllogictest/test_files/string_view.slt
Original file line number Diff line number Diff line change
Expand Up @@ -364,7 +364,7 @@ EXPLAIN SELECT
FROM test;
----
logical_plan
01)Projection: starts_with(CAST(test.column1_utf8view AS Utf8), Utf8("foo")) AS c, starts_with(test.column1_utf8view, test.column2_utf8view) AS c2
01)Projection: starts_with(test.column1_utf8view, Utf8View("foo")) AS c, starts_with(test.column1_utf8view, test.column2_utf8view) AS c2
02)--TableScan: test projection=[column1_utf8view, column2_utf8view]
>>>>>>> 222263c15 (simplify string view handling)

Expand Down

0 comments on commit f38b363

Please sign in to comment.