From 75202b587108cd8de6c702a84463ac0ca0ca4d4b Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 20 Dec 2024 09:37:35 -0500 Subject: [PATCH] Upgrade to sqlparser `0.53.0` (#13767) * chore: Udpate to sqlparser 0.53.0 * Update for new sqlparser API * more api updates * Avoid serializing query to SQL string unless it is necessary * Box wildcard options * chore: update datafusion-cli Cargo.lock --- Cargo.toml | 2 +- datafusion-cli/Cargo.lock | 8 +- datafusion/common/src/utils/mod.rs | 5 +- .../user_defined_scalar_functions.rs | 10 +- datafusion/expr/src/expr.rs | 2 +- datafusion/expr/src/expr_fn.rs | 8 +- .../src/analyzer/inline_table_scan.rs | 8 +- .../proto/src/logical_plan/from_proto.rs | 2 +- .../tests/cases/roundtrip_logical_plan.rs | 12 +- datafusion/sql/src/expr/function.rs | 24 +- datafusion/sql/src/expr/mod.rs | 8 +- datafusion/sql/src/parser.rs | 14 +- datafusion/sql/src/planner.rs | 14 +- datafusion/sql/src/select.rs | 1 + datafusion/sql/src/statement.rs | 207 ++++++++++++++---- datafusion/sql/src/unparser/ast.rs | 3 + datafusion/sql/src/unparser/dialect.rs | 6 +- datafusion/sql/src/unparser/expr.rs | 32 ++- datafusion/sql/src/unparser/plan.rs | 9 +- datafusion/sql/src/unparser/utils.rs | 12 +- .../test_files/information_schema.slt | 4 +- 21 files changed, 271 insertions(+), 120 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index cc94b4292a50..b7c8c09a8537 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -147,7 +147,7 @@ recursive = "0.1.1" regex = "1.8" rstest = "0.23.0" serde_json = "1" -sqlparser = { version = "0.52.0", features = ["visitor"] } +sqlparser = { version = "0.53.0", features = ["visitor"] } tempfile = "3" tokio = { version = "1.36", features = ["macros", "rt", "sync"] } url = "2.2" diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index 2ffc64114ef7..a435869dbece 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -3755,9 +3755,9 @@ checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" [[package]] name = "sqlparser" -version = "0.52.0" +version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a875d8cd437cc8a97e9aeaeea352ec9a19aea99c23e9effb17757291de80b08" +checksum = "05a528114c392209b3264855ad491fcce534b94a38771b0a0b97a79379275ce8" dependencies = [ "log", "sqlparser_derive", @@ -3765,9 +3765,9 @@ dependencies = [ [[package]] name = "sqlparser_derive" -version = "0.2.2" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" +checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" dependencies = [ "proc-macro2", "quote", diff --git a/datafusion/common/src/utils/mod.rs b/datafusion/common/src/utils/mod.rs index 5e840f859400..29d33fec14ab 100644 --- a/datafusion/common/src/utils/mod.rs +++ b/datafusion/common/src/utils/mod.rs @@ -887,10 +887,10 @@ pub fn get_available_parallelism() -> usize { #[cfg(test)] mod tests { + use super::*; use crate::ScalarValue::Null; use arrow::array::Float64Array; - - use super::*; + use sqlparser::tokenizer::Span; #[test] fn test_bisect_linear_left_and_right() -> Result<()> { @@ -1118,6 +1118,7 @@ mod tests { let expected_parsed = vec![Ident { value: identifier.to_string(), quote_style, + span: Span::empty(), }]; assert_eq!( diff --git a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs index 4d2a536c4920..30b3c6e2bbeb 100644 --- a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs +++ b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs @@ -27,10 +27,6 @@ use arrow_array::{ Array, ArrayRef, Float32Array, Float64Array, Int32Array, RecordBatch, StringArray, }; use arrow_schema::{DataType, Field, Schema}; -use parking_lot::Mutex; -use regex::Regex; -use sqlparser::ast::Ident; - use datafusion::execution::context::{FunctionFactory, RegisterFunction, SessionState}; use datafusion::prelude::*; use datafusion::{execution::registry::FunctionRegistry, test_util}; @@ -48,6 +44,10 @@ use datafusion_expr::{ Volatility, }; use datafusion_functions_nested::range::range_udf; +use parking_lot::Mutex; +use regex::Regex; +use sqlparser::ast::Ident; +use sqlparser::tokenizer::Span; /// test that casting happens on udfs. /// c11 is f32, but `custom_sqrt` requires f64. Casting happens but the logical plan and @@ -1187,6 +1187,7 @@ async fn create_scalar_function_from_sql_statement_postgres_syntax() -> Result<( name: Some(Ident { value: "name".into(), quote_style: None, + span: Span::empty(), }), data_type: DataType::Utf8, default_expr: None, @@ -1196,6 +1197,7 @@ async fn create_scalar_function_from_sql_statement_postgres_syntax() -> Result<( language: Some(Ident { value: "plrust".into(), quote_style: None, + span: Span::empty(), }), behavior: None, function_body: Some(lit(body)), diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index af54dad79d2e..c82572ebd5f1 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -313,7 +313,7 @@ pub enum Expr { /// plan into physical plan. Wildcard { qualifier: Option, - options: WildcardOptions, + options: Box, }, /// List of grouping set expressions. Only valid in the context of an aggregate /// GROUP BY expression list diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index a44dd24039dc..a2de5e7b259f 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -123,7 +123,7 @@ pub fn placeholder(id: impl Into) -> Expr { pub fn wildcard() -> Expr { Expr::Wildcard { qualifier: None, - options: WildcardOptions::default(), + options: Box::new(WildcardOptions::default()), } } @@ -131,7 +131,7 @@ pub fn wildcard() -> Expr { pub fn wildcard_with_options(options: WildcardOptions) -> Expr { Expr::Wildcard { qualifier: None, - options, + options: Box::new(options), } } @@ -148,7 +148,7 @@ pub fn wildcard_with_options(options: WildcardOptions) -> Expr { pub fn qualified_wildcard(qualifier: impl Into) -> Expr { Expr::Wildcard { qualifier: Some(qualifier.into()), - options: WildcardOptions::default(), + options: Box::new(WildcardOptions::default()), } } @@ -159,7 +159,7 @@ pub fn qualified_wildcard_with_options( ) -> Expr { Expr::Wildcard { qualifier: Some(qualifier.into()), - options, + options: Box::new(options), } } diff --git a/datafusion/optimizer/src/analyzer/inline_table_scan.rs b/datafusion/optimizer/src/analyzer/inline_table_scan.rs index 342d85a915b4..95781b395f3c 100644 --- a/datafusion/optimizer/src/analyzer/inline_table_scan.rs +++ b/datafusion/optimizer/src/analyzer/inline_table_scan.rs @@ -23,8 +23,7 @@ use crate::analyzer::AnalyzerRule; use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode}; use datafusion_common::{Column, Result}; -use datafusion_expr::expr::WildcardOptions; -use datafusion_expr::{logical_plan::LogicalPlan, Expr, LogicalPlanBuilder}; +use datafusion_expr::{logical_plan::LogicalPlan, wildcard, Expr, LogicalPlanBuilder}; /// Analyzed rule that inlines TableScan that provide a [`LogicalPlan`] /// (DataFrame / ViewTable) @@ -93,10 +92,7 @@ fn generate_projection_expr( ))); } } else { - exprs.push(Expr::Wildcard { - qualifier: None, - options: WildcardOptions::default(), - }); + exprs.push(wildcard()); } Ok(exprs) } diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs index 301efc42a7c4..6ab3e0c9096c 100644 --- a/datafusion/proto/src/logical_plan/from_proto.rs +++ b/datafusion/proto/src/logical_plan/from_proto.rs @@ -513,7 +513,7 @@ pub fn parse_expr( let qualifier = qualifier.to_owned().map(|x| x.try_into()).transpose()?; Ok(Expr::Wildcard { qualifier, - options: WildcardOptions::default(), + options: Box::new(WildcardOptions::default()), }) } ExprType::ScalarUdfExpr(protobuf::ScalarUdfExprNode { diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs index f793e96f612b..c0885ece08bc 100644 --- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs +++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs @@ -67,7 +67,7 @@ use datafusion_common::{ use datafusion_expr::dml::CopyTo; use datafusion_expr::expr::{ self, Between, BinaryExpr, Case, Cast, GroupingSet, InList, Like, ScalarFunction, - Unnest, WildcardOptions, + Unnest, }; use datafusion_expr::logical_plan::{Extension, UserDefinedLogicalNodeCore}; use datafusion_expr::{ @@ -2061,10 +2061,7 @@ fn roundtrip_unnest() { #[test] fn roundtrip_wildcard() { - let test_expr = Expr::Wildcard { - qualifier: None, - options: WildcardOptions::default(), - }; + let test_expr = wildcard(); let ctx = SessionContext::new(); roundtrip_expr_test(test_expr, ctx); @@ -2072,10 +2069,7 @@ fn roundtrip_wildcard() { #[test] fn roundtrip_qualified_wildcard() { - let test_expr = Expr::Wildcard { - qualifier: Some("foo".into()), - options: WildcardOptions::default(), - }; + let test_expr = qualified_wildcard("foo"); let ctx = SessionContext::new(); roundtrip_expr_test(test_expr, ctx); diff --git a/datafusion/sql/src/expr/function.rs b/datafusion/sql/src/expr/function.rs index 67fa23b86990..da1a4ba81f5a 100644 --- a/datafusion/sql/src/expr/function.rs +++ b/datafusion/sql/src/expr/function.rs @@ -22,11 +22,11 @@ use datafusion_common::{ internal_datafusion_err, internal_err, not_impl_err, plan_datafusion_err, plan_err, DFSchema, Dependency, Result, }; -use datafusion_expr::expr::WildcardOptions; use datafusion_expr::expr::{ScalarFunction, Unnest}; use datafusion_expr::planner::PlannerResult; use datafusion_expr::{ - expr, Expr, ExprFunctionExt, ExprSchemable, WindowFrame, WindowFunctionDefinition, + expr, qualified_wildcard, wildcard, Expr, ExprFunctionExt, ExprSchemable, + WindowFrame, WindowFunctionDefinition, }; use sqlparser::ast::{ DuplicateTreatment, Expr as SQLExpr, Function as SQLFunction, FunctionArg, @@ -169,6 +169,11 @@ impl FunctionArgs { "Calling {name}: SEPARATOR not supported in function arguments: {sep}" ) } + FunctionArgumentClause::JsonNullClause(jn) => { + return not_impl_err!( + "Calling {name}: JSON NULL clause not supported in function arguments: {jn}" + ) + } } } @@ -413,17 +418,11 @@ impl SqlToRel<'_, S> { name: _, arg: FunctionArgExpr::Wildcard, operator: _, - } => Ok(Expr::Wildcard { - qualifier: None, - options: WildcardOptions::default(), - }), + } => Ok(wildcard()), FunctionArg::Unnamed(FunctionArgExpr::Expr(arg)) => { self.sql_expr_to_logical_expr(arg, schema, planner_context) } - FunctionArg::Unnamed(FunctionArgExpr::Wildcard) => Ok(Expr::Wildcard { - qualifier: None, - options: WildcardOptions::default(), - }), + FunctionArg::Unnamed(FunctionArgExpr::Wildcard) => Ok(wildcard()), FunctionArg::Unnamed(FunctionArgExpr::QualifiedWildcard(object_name)) => { let qualifier = self.object_name_to_table_reference(object_name)?; // Sanity check on qualifier with schema @@ -431,10 +430,7 @@ impl SqlToRel<'_, S> { if qualified_indices.is_empty() { return plan_err!("Invalid qualifier {qualifier}"); } - Ok(Expr::Wildcard { - qualifier: Some(qualifier), - options: WildcardOptions::default(), - }) + Ok(qualified_wildcard(qualifier)) } _ => not_impl_err!("Unsupported qualified wildcard argument: {sql:?}"), } diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index e8ec8d7b7d1c..a651d8fa5d35 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -593,13 +593,13 @@ impl SqlToRel<'_, S> { } not_impl_err!("AnyOp not supported by ExprPlanner: {binary_expr:?}") } - SQLExpr::Wildcard => Ok(Expr::Wildcard { + SQLExpr::Wildcard(_token) => Ok(Expr::Wildcard { qualifier: None, - options: WildcardOptions::default(), + options: Box::new(WildcardOptions::default()), }), - SQLExpr::QualifiedWildcard(object_name) => Ok(Expr::Wildcard { + SQLExpr::QualifiedWildcard(object_name, _token) => Ok(Expr::Wildcard { qualifier: Some(self.object_name_to_table_reference(object_name)?), - options: WildcardOptions::default(), + options: Box::new(WildcardOptions::default()), }), SQLExpr::Tuple(values) => self.parse_tuple(schema, planner_context, values), _ => not_impl_err!("Unsupported ast node in sqltorel: {sql:?}"), diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs index efec6020641c..f185d65fa194 100644 --- a/datafusion/sql/src/parser.rs +++ b/datafusion/sql/src/parser.rs @@ -21,6 +21,7 @@ use std::collections::VecDeque; use std::fmt; use sqlparser::ast::ExprWithAlias; +use sqlparser::tokenizer::TokenWithSpan; use sqlparser::{ ast::{ ColumnDef, ColumnOptionDef, ObjectName, OrderByExpr, Query, @@ -28,7 +29,7 @@ use sqlparser::{ }, dialect::{keywords::Keyword, Dialect, GenericDialect}, parser::{Parser, ParserError}, - tokenizer::{Token, TokenWithLocation, Tokenizer, Word}, + tokenizer::{Token, Tokenizer, Word}, }; // Use `Parser::expected` instead, if possible @@ -338,7 +339,7 @@ impl<'a> DFParser<'a> { fn expected( &self, expected: &str, - found: TokenWithLocation, + found: TokenWithSpan, ) -> Result { parser_err!(format!("Expected {expected}, found: {found}")) } @@ -876,6 +877,7 @@ mod tests { use super::*; use sqlparser::ast::Expr::Identifier; use sqlparser::ast::{BinaryOperator, DataType, Expr, Ident}; + use sqlparser::tokenizer::Span; fn expect_parse_ok(sql: &str, expected: Statement) -> Result<(), ParserError> { let statements = DFParser::parse_sql(sql)?; @@ -911,6 +913,7 @@ mod tests { name: Ident { value: name.into(), quote_style: None, + span: Span::empty(), }, data_type, collation: None, @@ -1219,6 +1222,7 @@ mod tests { expr: Identifier(Ident { value: "c1".to_owned(), quote_style: None, + span: Span::empty(), }), asc, nulls_first, @@ -1250,6 +1254,7 @@ mod tests { expr: Identifier(Ident { value: "c1".to_owned(), quote_style: None, + span: Span::empty(), }), asc: Some(true), nulls_first: None, @@ -1259,6 +1264,7 @@ mod tests { expr: Identifier(Ident { value: "c2".to_owned(), quote_style: None, + span: Span::empty(), }), asc: Some(false), nulls_first: Some(true), @@ -1290,11 +1296,13 @@ mod tests { left: Box::new(Identifier(Ident { value: "c1".to_owned(), quote_style: None, + span: Span::empty(), })), op: BinaryOperator::Minus, right: Box::new(Identifier(Ident { value: "c2".to_owned(), quote_style: None, + span: Span::empty(), })), }, asc: Some(true), @@ -1335,11 +1343,13 @@ mod tests { left: Box::new(Identifier(Ident { value: "c1".to_owned(), quote_style: None, + span: Span::empty(), })), op: BinaryOperator::Minus, right: Box::new(Identifier(Ident { value: "c2".to_owned(), quote_style: None, + span: Span::empty(), })), }, asc: Some(true), diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index 2d0ba8f8d994..d917a707ca20 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -310,7 +310,8 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { plan: LogicalPlan, alias: TableAlias, ) -> Result { - let plan = self.apply_expr_alias(plan, alias.columns)?; + let idents = alias.columns.into_iter().map(|c| c.name).collect(); + let plan = self.apply_expr_alias(plan, idents)?; LogicalPlanBuilder::from(plan) .alias(TableReference::bare( @@ -513,7 +514,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { | SQLDataType::Regclass | SQLDataType::Custom(_, _) | SQLDataType::Array(_) - | SQLDataType::Enum(_) + | SQLDataType::Enum(_, _) | SQLDataType::Set(_) | SQLDataType::MediumInt(_) | SQLDataType::UnsignedMediumInt(_) @@ -557,6 +558,15 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { | SQLDataType::Nullable(_) | SQLDataType::LowCardinality(_) | SQLDataType::Trigger + // MySQL datatypes + | SQLDataType::TinyBlob + | SQLDataType::MediumBlob + | SQLDataType::LongBlob + | SQLDataType::TinyText + | SQLDataType::MediumText + | SQLDataType::LongText + | SQLDataType::Bit(_) + |SQLDataType::BitVarying(_) => not_impl_err!( "Unsupported SQL type {sql_type:?}" ), diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs index c2a9bac24e66..9a84c00a8044 100644 --- a/datafusion/sql/src/select.rs +++ b/datafusion/sql/src/select.rs @@ -655,6 +655,7 @@ impl SqlToRel<'_, S> { opt_rename, opt_replace: _opt_replace, opt_ilike: _opt_ilike, + wildcard_token: _wildcard_token, } = options; if opt_rename.is_some() { diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs index f750afbc4a53..e264b9083cc0 100644 --- a/datafusion/sql/src/statement.rs +++ b/datafusion/sql/src/statement.rs @@ -33,7 +33,7 @@ use arrow_schema::{DataType, Fields}; use datafusion_common::error::_plan_err; use datafusion_common::parsers::CompressionTypeVariant; use datafusion_common::{ - exec_err, not_impl_err, plan_datafusion_err, plan_err, schema_err, + exec_err, internal_err, not_impl_err, plan_datafusion_err, plan_err, schema_err, unqualified_field_not_found, Column, Constraint, Constraints, DFSchema, DFSchemaRef, DataFusionError, Result, ScalarValue, SchemaError, SchemaReference, TableReference, ToDFSchema, @@ -54,13 +54,16 @@ use datafusion_expr::{ TransactionConclusion, TransactionEnd, TransactionIsolationLevel, TransactionStart, Volatility, WriteOp, }; -use sqlparser::ast::{self, SqliteOnConflict}; +use sqlparser::ast::{ + self, BeginTransactionKind, NullsDistinctOption, ShowStatementIn, + ShowStatementOptions, SqliteOnConflict, +}; use sqlparser::ast::{ Assignment, AssignmentTarget, ColumnDef, CreateIndex, CreateTable, CreateTableOptions, Delete, DescribeAlias, Expr as SQLExpr, FromTable, Ident, Insert, ObjectName, ObjectType, OneOrManyWithParens, Query, SchemaName, SetExpr, - ShowCreateObject, ShowStatementFilter, Statement, TableConstraint, TableFactor, - TableWithJoins, TransactionMode, UnaryOperator, Value, + ShowCreateObject, Statement, TableConstraint, TableFactor, TableWithJoins, + TransactionMode, UnaryOperator, Value, }; use sqlparser::parser::ParserError::ParserError; @@ -107,6 +110,7 @@ fn calc_inline_constraints_from_columns(columns: &[ColumnDef]) -> Vec SqlToRel<'_, S> { statement: Statement, planner_context: &mut PlannerContext, ) -> Result { - let sql = Some(statement.to_string()); match statement { Statement::ExplainTable { describe_alias: DescribeAlias::Describe, // only parse 'DESCRIBE table_name' and not 'EXPLAIN table_name' @@ -514,6 +517,35 @@ impl SqlToRel<'_, S> { return not_impl_err!("To not supported")?; } + // put the statement back together temporarily to get the SQL + // string representation + let stmt = Statement::CreateView { + or_replace, + materialized, + name, + columns, + query, + options: CreateTableOptions::None, + cluster_by, + comment, + with_no_schema_binding, + if_not_exists, + temporary, + to, + }; + let sql = stmt.to_string(); + let Statement::CreateView { + name, + columns, + query, + or_replace, + temporary, + .. + } = stmt + else { + return internal_err!("Unreachable code in create view"); + }; + let columns = columns .into_iter() .map(|view_column_def| { @@ -534,7 +566,7 @@ impl SqlToRel<'_, S> { name: self.object_name_to_table_reference(name)?, input: Arc::new(plan), or_replace, - definition: sql, + definition: Some(sql), temporary, }))) } @@ -685,19 +717,99 @@ impl SqlToRel<'_, S> { Statement::ShowTables { extended, full, - db_name, - filter, - // SHOW TABLES IN/FROM are equivalent, this field specifies which the user - // specified, but it doesn't affect the plan so ignore the field - clause: _, - } => self.show_tables_to_plan(extended, full, db_name, filter), + terse, + history, + external, + show_options, + } => { + // We only support the basic "SHOW TABLES" + // https://github.com/apache/datafusion/issues/3188 + if extended { + return not_impl_err!("SHOW TABLES EXTENDED not supported")?; + } + if full { + return not_impl_err!("SHOW FULL TABLES not supported")?; + } + if terse { + return not_impl_err!("SHOW TERSE TABLES not supported")?; + } + if history { + return not_impl_err!("SHOW TABLES HISTORY not supported")?; + } + if external { + return not_impl_err!("SHOW EXTERNAL TABLES not supported")?; + } + let ShowStatementOptions { + show_in, + starts_with, + limit, + limit_from, + filter_position, + } = show_options; + if show_in.is_some() { + return not_impl_err!("SHOW TABLES IN not supported")?; + } + if starts_with.is_some() { + return not_impl_err!("SHOW TABLES LIKE not supported")?; + } + if limit.is_some() { + return not_impl_err!("SHOW TABLES LIMIT not supported")?; + } + if limit_from.is_some() { + return not_impl_err!("SHOW TABLES LIMIT FROM not supported")?; + } + if filter_position.is_some() { + return not_impl_err!("SHOW TABLES FILTER not supported")?; + } + self.show_tables_to_plan() + } Statement::ShowColumns { extended, full, - table_name, - filter, - } => self.show_columns_to_plan(extended, full, table_name, filter), + show_options, + } => { + let ShowStatementOptions { + show_in, + starts_with, + limit, + limit_from, + filter_position, + } = show_options; + if starts_with.is_some() { + return not_impl_err!("SHOW COLUMNS LIKE not supported")?; + } + if limit.is_some() { + return not_impl_err!("SHOW COLUMNS LIMIT not supported")?; + } + if limit_from.is_some() { + return not_impl_err!("SHOW COLUMNS LIMIT FROM not supported")?; + } + if filter_position.is_some() { + return not_impl_err!( + "SHOW COLUMNS with WHERE or LIKE is not supported" + )?; + } + let Some(ShowStatementIn { + // specifies if the syntax was `SHOW COLUMNS IN` or `SHOW + // COLUMNS FROM` which is not different in DataFusion + clause: _, + parent_type, + parent_name, + }) = show_in + else { + return plan_err!("SHOW COLUMNS requires a table name"); + }; + + if let Some(parent_type) = parent_type { + return not_impl_err!("SHOW COLUMNS IN {parent_type} not supported"); + } + let Some(table_name) = parent_name else { + return plan_err!("SHOW COLUMNS requires a table name"); + }; + + self.show_columns_to_plan(extended, full, table_name) + } Statement::Insert(Insert { or, @@ -766,10 +878,14 @@ impl SqlToRel<'_, S> { from, selection, returning, + or, } => { if returning.is_some() { plan_err!("Update-returning clause not yet supported")?; } + if or.is_some() { + plan_err!("ON conflict not supported")?; + } self.update_to_plan(table, assignments, from, selection) } @@ -810,12 +926,14 @@ impl SqlToRel<'_, S> { modes, begin: false, modifier, + transaction, } => { if let Some(modifier) = modifier { return not_impl_err!( "Transaction modifier not supported: {modifier}" ); } + self.validate_transaction_kind(transaction)?; let isolation_level: ast::TransactionIsolationLevel = modes .iter() .filter_map(|m: &TransactionMode| match m { @@ -879,7 +997,7 @@ impl SqlToRel<'_, S> { }); Ok(LogicalPlan::Statement(statement)) } - Statement::CreateFunction { + Statement::CreateFunction(ast::CreateFunction { or_replace, temporary, name, @@ -889,7 +1007,7 @@ impl SqlToRel<'_, S> { behavior, language, .. - } => { + }) => { let return_type = match return_type { Some(t) => Some(self.convert_data_type(&t)?), None => None, @@ -1033,8 +1151,8 @@ impl SqlToRel<'_, S> { }, ))) } - _ => { - not_impl_err!("Unsupported SQL statement: {sql:?}") + stmt => { + not_impl_err!("Unsupported SQL statement: {stmt}") } } } @@ -1065,24 +1183,12 @@ impl SqlToRel<'_, S> { } /// Generate a logical plan from a "SHOW TABLES" query - fn show_tables_to_plan( - &self, - extended: bool, - full: bool, - db_name: Option, - filter: Option, - ) -> Result { + fn show_tables_to_plan(&self) -> Result { if self.has_table("information_schema", "tables") { - // We only support the basic "SHOW TABLES" - // https://github.com/apache/datafusion/issues/3188 - if db_name.is_some() || filter.is_some() || full || extended { - plan_err!("Unsupported parameters to SHOW TABLES") - } else { - let query = "SELECT * FROM information_schema.tables;"; - let mut rewrite = DFParser::parse_sql(query)?; - assert_eq!(rewrite.len(), 1); - self.statement_to_plan(rewrite.pop_front().unwrap()) // length of rewrite is 1 - } + let query = "SELECT * FROM information_schema.tables;"; + let mut rewrite = DFParser::parse_sql(query)?; + assert_eq!(rewrite.len(), 1); + self.statement_to_plan(rewrite.pop_front().unwrap()) // length of rewrite is 1 } else { plan_err!("SHOW TABLES is not supported unless information_schema is enabled") } @@ -1841,22 +1947,18 @@ impl SqlToRel<'_, S> { extended: bool, full: bool, sql_table_name: ObjectName, - filter: Option, ) -> Result { - if filter.is_some() { - return plan_err!("SHOW COLUMNS with WHERE or LIKE is not supported"); - } + // Figure out the where clause + let where_clause = object_name_to_qualifier( + &sql_table_name, + self.options.enable_ident_normalization, + ); if !self.has_table("information_schema", "columns") { return plan_err!( "SHOW COLUMNS is not supported unless information_schema is enabled" ); } - // Figure out the where clause - let where_clause = object_name_to_qualifier( - &sql_table_name, - self.options.enable_ident_normalization, - ); // Do a table lookup to verify the table exists let table_ref = self.object_name_to_table_reference(sql_table_name)?; @@ -1916,4 +2018,19 @@ impl SqlToRel<'_, S> { .get_table_source(tables_reference) .is_ok() } + + fn validate_transaction_kind( + &self, + kind: Option, + ) -> Result<()> { + match kind { + // BEGIN + None => Ok(()), + // BEGIN TRANSACTION + Some(BeginTransactionKind::Transaction) => Ok(()), + Some(BeginTransactionKind::Work) => { + not_impl_err!("Transaction kind not supported: {kind:?}") + } + } + } } diff --git a/datafusion/sql/src/unparser/ast.rs b/datafusion/sql/src/unparser/ast.rs index ad0b5f16b283..345d16adef29 100644 --- a/datafusion/sql/src/unparser/ast.rs +++ b/datafusion/sql/src/unparser/ast.rs @@ -24,6 +24,7 @@ use core::fmt; use sqlparser::ast; +use sqlparser::ast::helpers::attached_token::AttachedToken; #[derive(Clone)] pub(super) struct QueryBuilder { @@ -268,6 +269,7 @@ impl SelectBuilder { connect_by: None, window_before_qualify: false, prewhere: None, + select_token: AttachedToken::empty(), }) } fn create_empty() -> Self { @@ -469,6 +471,7 @@ impl TableRelationBuilder { version: self.version.clone(), partitions: self.partitions.clone(), with_ordinality: false, + json_path: None, }) } fn create_empty() -> Self { diff --git a/datafusion/sql/src/unparser/dialect.rs b/datafusion/sql/src/unparser/dialect.rs index ae387d441fa2..a82687533e31 100644 --- a/datafusion/sql/src/unparser/dialect.rs +++ b/datafusion/sql/src/unparser/dialect.rs @@ -18,15 +18,15 @@ use std::sync::Arc; use arrow_schema::TimeUnit; +use datafusion_common::Result; use datafusion_expr::Expr; use regex::Regex; +use sqlparser::tokenizer::Span; use sqlparser::{ ast::{self, BinaryOperator, Function, Ident, ObjectName, TimezoneInfo}, keywords::ALL_KEYWORDS, }; -use datafusion_common::Result; - use super::{utils::character_length_to_sql, utils::date_part_to_sql, Unparser}; /// `Dialect` to use for Unparsing @@ -288,6 +288,7 @@ impl PostgreSqlDialect { name: ObjectName(vec![Ident { value: func_name.to_string(), quote_style: None, + span: Span::empty(), }]), args: ast::FunctionArguments::List(ast::FunctionArgumentList { duplicate_treatment: None, @@ -299,6 +300,7 @@ impl PostgreSqlDialect { over: None, within_group: vec![], parameters: ast::FunctionArguments::None, + uses_odbc_syntax: false, })) } } diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index 48dfc425ee63..f09de133b571 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -43,6 +43,8 @@ use datafusion_expr::{ expr::{Alias, Exists, InList, ScalarFunction, Sort, WindowFunction}, Between, BinaryExpr, Case, Cast, Expr, GroupingSet, Like, Operator, TryCast, }; +use sqlparser::ast::helpers::attached_token::AttachedToken; +use sqlparser::tokenizer::Span; /// Convert a DataFusion [`Expr`] to [`ast::Expr`] /// @@ -233,6 +235,7 @@ impl Unparser<'_> { name: ObjectName(vec![Ident { value: func_name.to_string(), quote_style: None, + span: Span::empty(), }]), args: ast::FunctionArguments::List(ast::FunctionArgumentList { duplicate_treatment: None, @@ -244,6 +247,7 @@ impl Unparser<'_> { over, within_group: vec![], parameters: ast::FunctionArguments::None, + uses_odbc_syntax: false, })) } Expr::SimilarTo(Like { @@ -278,6 +282,7 @@ impl Unparser<'_> { name: ObjectName(vec![Ident { value: func_name.to_string(), quote_style: None, + span: Span::empty(), }]), args: ast::FunctionArguments::List(ast::FunctionArgumentList { duplicate_treatment: agg @@ -291,6 +296,7 @@ impl Unparser<'_> { over: None, within_group: vec![], parameters: ast::FunctionArguments::None, + uses_odbc_syntax: false, })) } Expr::ScalarSubquery(subq) => { @@ -404,12 +410,16 @@ impl Unparser<'_> { } // TODO: unparsing wildcard addition options Expr::Wildcard { qualifier, .. } => { + let attached_token = AttachedToken::empty(); if let Some(qualifier) = qualifier { let idents: Vec = qualifier.to_vec().into_iter().map(Ident::new).collect(); - Ok(ast::Expr::QualifiedWildcard(ObjectName(idents))) + Ok(ast::Expr::QualifiedWildcard( + ObjectName(idents), + attached_token, + )) } else { - Ok(ast::Expr::Wildcard) + Ok(ast::Expr::Wildcard(attached_token)) } } Expr::GroupingSet(grouping_set) => match grouping_set { @@ -480,6 +490,7 @@ impl Unparser<'_> { name: ObjectName(vec![Ident { value: func_name.to_string(), quote_style: None, + span: Span::empty(), }]), args: ast::FunctionArguments::List(ast::FunctionArgumentList { duplicate_treatment: None, @@ -491,6 +502,7 @@ impl Unparser<'_> { over: None, within_group: vec![], parameters: ast::FunctionArguments::None, + uses_odbc_syntax: false, })) } @@ -709,6 +721,7 @@ impl Unparser<'_> { Ident { value: ident, quote_style, + span: Span::empty(), } } @@ -716,6 +729,7 @@ impl Unparser<'_> { Ident { value: str, quote_style: None, + span: Span::empty(), } } @@ -1481,6 +1495,7 @@ impl Unparser<'_> { name: ObjectName(vec![Ident { value: "UNNEST".to_string(), quote_style: None, + span: Span::empty(), }]), args: ast::FunctionArguments::List(ast::FunctionArgumentList { duplicate_treatment: None, @@ -1492,6 +1507,7 @@ impl Unparser<'_> { over: None, within_group: vec![], parameters: ast::FunctionArguments::None, + uses_odbc_syntax: false, })) } @@ -1672,7 +1688,7 @@ mod tests { let dummy_logical_plan = table_scan(Some("t"), &dummy_schema, None)? .project(vec![Expr::Wildcard { qualifier: None, - options: WildcardOptions::default(), + options: Box::new(WildcardOptions::default()), }])? .filter(col("a").eq(lit(1)))? .build()?; @@ -1864,10 +1880,7 @@ mod tests { (sum(col("a")), r#"sum(a)"#), ( count_udaf() - .call(vec![Expr::Wildcard { - qualifier: None, - options: WildcardOptions::default(), - }]) + .call(vec![wildcard()]) .distinct() .build() .unwrap(), @@ -1875,10 +1888,7 @@ mod tests { ), ( count_udaf() - .call(vec![Expr::Wildcard { - qualifier: None, - options: WildcardOptions::default(), - }]) + .call(vec![wildcard()]) .filter(lit(true)) .build() .unwrap(), diff --git a/datafusion/sql/src/unparser/plan.rs b/datafusion/sql/src/unparser/plan.rs index e9f9f486ea9a..f2d46a9f4cce 100644 --- a/datafusion/sql/src/unparser/plan.rs +++ b/datafusion/sql/src/unparser/plan.rs @@ -44,7 +44,7 @@ use datafusion_expr::{ expr::Alias, BinaryExpr, Distinct, Expr, JoinConstraint, JoinType, LogicalPlan, LogicalPlanBuilder, Operator, Projection, SortExpr, TableScan, Unnest, }; -use sqlparser::ast::{self, Ident, SetExpr}; +use sqlparser::ast::{self, Ident, SetExpr, TableAliasColumnDef}; use std::sync::Arc; /// Convert a DataFusion [`LogicalPlan`] to [`ast::Statement`] @@ -1069,6 +1069,13 @@ impl Unparser<'_> { } fn new_table_alias(&self, alias: String, columns: Vec) -> ast::TableAlias { + let columns = columns + .into_iter() + .map(|ident| TableAliasColumnDef { + name: ident, + data_type: None, + }) + .collect(); ast::TableAlias { name: self.new_ident_quoted_if_needs(alias), columns, diff --git a/datafusion/sql/src/unparser/utils.rs b/datafusion/sql/src/unparser/utils.rs index 354a68f60964..3a7fa5ddcabb 100644 --- a/datafusion/sql/src/unparser/utils.rs +++ b/datafusion/sql/src/unparser/utils.rs @@ -17,6 +17,10 @@ use std::{cmp::Ordering, sync::Arc, vec}; +use super::{ + dialect::CharacterLengthStyle, dialect::DateFieldExtractStyle, + rewrite::TableAliasRewriter, Unparser, +}; use datafusion_common::{ internal_err, tree_node::{Transformed, TransformedResult, TreeNode}, @@ -29,11 +33,7 @@ use datafusion_expr::{ use indexmap::IndexSet; use sqlparser::ast; - -use super::{ - dialect::CharacterLengthStyle, dialect::DateFieldExtractStyle, - rewrite::TableAliasRewriter, Unparser, -}; +use sqlparser::tokenizer::Span; /// Recursively searches children of [LogicalPlan] to find an Aggregate node if exists /// prior to encountering a Join, TableScan, or a nested subquery (derived table factor). @@ -426,6 +426,7 @@ pub(crate) fn date_part_to_sql( name: ast::ObjectName(vec![ast::Ident { value: "strftime".to_string(), quote_style: None, + span: Span::empty(), }]), args: ast::FunctionArguments::List(ast::FunctionArgumentList { duplicate_treatment: None, @@ -444,6 +445,7 @@ pub(crate) fn date_part_to_sql( over: None, within_group: vec![], parameters: ast::FunctionArguments::None, + uses_odbc_syntax: false, }))); } } diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt index 1f6b5f9852ec..476a933c72b7 100644 --- a/datafusion/sqllogictest/test_files/information_schema.slt +++ b/datafusion/sqllogictest/test_files/information_schema.slt @@ -483,10 +483,10 @@ set datafusion.catalog.information_schema = true; statement ok CREATE TABLE t AS SELECT 1::int as i; -statement error Error during planning: SHOW COLUMNS with WHERE or LIKE is not supported +statement error DataFusion error: This feature is not implemented: SHOW COLUMNS with WHERE or LIKE is not supported SHOW COLUMNS FROM t LIKE 'f'; -statement error Error during planning: SHOW COLUMNS with WHERE or LIKE is not supported +statement error DataFusion error: This feature is not implemented: SHOW COLUMNS with WHERE or LIKE is not supported SHOW COLUMNS FROM t WHERE column_name = 'bar'; query TTTTTT