Skip to content

Commit

Permalink
simplify string view handling
Browse files Browse the repository at this point in the history
  • Loading branch information
tshauck committed Aug 2, 2024
1 parent 746ef51 commit 222263c
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 48 deletions.
65 changes: 17 additions & 48 deletions datafusion/functions/src/string/starts_with.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,48 +18,21 @@
use std::any::Any;
use std::sync::Arc;

use arrow::array::{ArrayRef, AsArray, OffsetSizeTrait};
use arrow::array::ArrayRef;
use arrow::datatypes::DataType;

use datafusion_common::{cast::as_generic_string_array, internal_err, Result};
use datafusion_common::{internal_err, Result};
use datafusion_expr::ColumnarValue;
use datafusion_expr::TypeSignature::*;
use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};

use crate::utils::make_scalar_function;

/// Returns true if string starts with prefix.
/// starts_with('alphabet', 'alph') = t
pub fn starts_with<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
let bool_result = match (args[0].data_type(), args[1].data_type()) {
(DataType::Utf8View, DataType::Utf8View) => {
let left = args[0].as_string_view();
let right = args[1].as_string_view();

arrow::compute::kernels::comparison::starts_with(left, right)?
}
(DataType::Utf8View, DataType::Utf8 | DataType::LargeUtf8) => {
let left = args[0].as_string_view();
let right = as_generic_string_array::<T>(args[1].as_ref())?;

arrow::compute::kernels::comparison::starts_with(left, right)?
}
(DataType::Utf8 | DataType::LargeUtf8, DataType::Utf8View) => {
let left = as_generic_string_array::<T>(args[0].as_ref())?;
let right = args[1].as_string_view();

arrow::compute::kernels::comparison::starts_with(left, right)?
}
(DataType::Utf8 | DataType::LargeUtf8, DataType::Utf8 | DataType::LargeUtf8) => {
let left = as_generic_string_array::<T>(args[0].as_ref())?;
let right = as_generic_string_array::<T>(args[1].as_ref())?;

arrow::compute::kernels::comparison::starts_with(left, right)?
}
_ => internal_err!("Unsupported data types for starts_with")?,
};

Ok(Arc::new(bool_result) as ArrayRef)
/// starts_with('alphabet', 'alph') = 't'
pub fn starts_with(args: &[ArrayRef]) -> Result<ArrayRef> {
let result = arrow::compute::kernels::comparison::starts_with(&args[0], &args[1])?;
Ok(Arc::new(result) as ArrayRef)
}

#[derive(Debug)]
Expand All @@ -75,19 +48,15 @@ impl Default for StartsWithFunc {

impl StartsWithFunc {
pub fn new() -> Self {
use DataType::*;

let string_types = vec![Utf8, LargeUtf8, Utf8View];
let mut type_signatures = vec![];

for left in &string_types {
for right in &string_types {
type_signatures.push(Exact(vec![left.clone(), right.clone()]));
}
}

Self {
signature: Signature::one_of(type_signatures, Volatility::Immutable),
signature: Signature::one_of(
vec![
Exact(vec![DataType::Utf8View, DataType::Utf8View]),
Exact(vec![DataType::Utf8, DataType::Utf8]),
Exact(vec![DataType::LargeUtf8, DataType::LargeUtf8]),
],
Volatility::Immutable,
),
}
}
}
Expand All @@ -111,9 +80,9 @@ impl ScalarUDFImpl for StartsWithFunc {

fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
match args[0].data_type() {
DataType::Utf8 => make_scalar_function(starts_with::<i32>, vec![])(args),
DataType::LargeUtf8 => make_scalar_function(starts_with::<i64>, vec![])(args),
DataType::Utf8View => make_scalar_function(starts_with::<i32>, vec![])(args),
DataType::Utf8View | DataType::Utf8 | DataType::LargeUtf8 => {
make_scalar_function(starts_with, vec![])(args)
}
_ => internal_err!("Unsupported data types for starts_with")?,
}
}
Expand Down
9 changes: 9 additions & 0 deletions datafusion/sqllogictest/test_files/string_view.slt
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,15 @@ logical_plan
02)--Filter: CAST(test.column2_utf8 AS Utf8View) = test.column1_utf8view
03)----TableScan: test projection=[column1_utf8, column2_utf8, column1_utf8view]

query TT
EXPLAIN SELECT
STARTS_WITH(column1_utf8view, 'foo') as c,
STARTS_WITH(column1_utf8view, column2_utf8view) as c2
FROM test;
----
logical_plan
01)Projection: starts_with(CAST(test.column1_utf8view AS Utf8), Utf8("foo")) AS c, starts_with(test.column1_utf8view, test.column2_utf8view) AS c2
02)--TableScan: test projection=[column1_utf8view, column2_utf8view]

statement ok
drop table test;

0 comments on commit 222263c

Please sign in to comment.