Skip to content

Commit

Permalink
fix: disable initCap by default (#1276)
Browse files Browse the repository at this point in the history
* fix: disable initCap by default

* Update spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala

Co-authored-by: Andy Grove <[email protected]>

* address review comments

---------

Co-authored-by: Andy Grove <[email protected]>
  • Loading branch information
kazuyukitanimura and andygrove authored Jan 14, 2025
1 parent 1eb932a commit 9fe5420
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 6 deletions.
2 changes: 2 additions & 0 deletions common/src/main/scala/org/apache/comet/CometConf.scala
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,8 @@ object CometConf extends ShimCometConf {
createExecEnabledConfig("window", defaultValue = true)
val COMET_EXEC_TAKE_ORDERED_AND_PROJECT_ENABLED: ConfigEntry[Boolean] =
createExecEnabledConfig("takeOrderedAndProject", defaultValue = true)
val COMET_EXEC_INITCAP_ENABLED: ConfigEntry[Boolean] =
createExecEnabledConfig("initCap", defaultValue = false)

val COMET_EXEC_SORT_MERGE_JOIN_WITH_JOIN_FILTER_ENABLED: ConfigEntry[Boolean] =
conf("spark.comet.exec.sortMergeJoinWithJoinFilter.enabled")
Expand Down
1 change: 1 addition & 0 deletions docs/source/user-guide/configs.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ Comet provides the following configuration settings.
| spark.comet.exec.filter.enabled | Whether to enable filter by default. | true |
| spark.comet.exec.globalLimit.enabled | Whether to enable globalLimit by default. | true |
| spark.comet.exec.hashJoin.enabled | Whether to enable hashJoin by default. | true |
| spark.comet.exec.initCap.enabled | Whether to enable initCap by default. | false |
| spark.comet.exec.localLimit.enabled | Whether to enable localLimit by default. | true |
| spark.comet.exec.memoryPool | The type of memory pool to be used for Comet native execution. Available memory pool types are 'greedy', 'fair_spill', 'greedy_task_shared', 'fair_spill_task_shared', 'greedy_global' and 'fair_spill_global', By default, this config is 'greedy_task_shared'. | greedy_task_shared |
| spark.comet.exec.project.enabled | Whether to enable project by default. | true |
Expand Down
17 changes: 13 additions & 4 deletions spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1795,10 +1795,19 @@ object QueryPlanSerde extends Logging with ShimQueryPlanSerde with CometExprShim
optExprWithInfo(optExpr, expr, child)

case InitCap(child) =>
val castExpr = Cast(child, StringType)
val childExpr = exprToProtoInternal(castExpr, inputs)
val optExpr = scalarExprToProto("initcap", childExpr)
optExprWithInfo(optExpr, expr, castExpr)
if (CometConf.COMET_EXEC_INITCAP_ENABLED.get()) {
val castExpr = Cast(child, StringType)
val childExpr = exprToProtoInternal(castExpr, inputs)
val optExpr = scalarExprToProto("initcap", childExpr)
optExprWithInfo(optExpr, expr, castExpr)
} else {
withInfo(
expr,
"Comet initCap is not compatible with Spark yet. " +
"See https://github.com/apache/datafusion-comet/issues/1052 ." +
s"Set ${CometConf.COMET_EXEC_INITCAP_ENABLED.key}=true to enable it anyway.")
None
}

case Length(child) =>
val castExpr = Cast(child, StringType)
Expand Down
10 changes: 8 additions & 2 deletions spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1364,8 +1364,14 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper {
sql(s"create table $table(id int, name varchar(20)) using parquet")
sql(
s"insert into $table values(1, 'james smith'), (2, 'michael rose'), " +
"(3, 'robert williams'), (4, 'rames rose'), (5, 'james smith')")
checkSparkAnswerAndOperator(s"SELECT initcap(name) FROM $table")
"(3, 'robert williams'), (4, 'rames rose'), (5, 'james smith'), " +
"(6, 'robert rose-smith'), (7, 'james ähtäri')")
if (CometConf.COMET_EXEC_INITCAP_ENABLED.get()) {
// TODO: remove this if clause https://github.com/apache/datafusion-comet/issues/1052
checkSparkAnswerAndOperator(s"SELECT initcap(name) FROM $table")
} else {
checkSparkAnswer(s"SELECT initcap(name) FROM $table")
}
}
}
}
Expand Down

0 comments on commit 9fe5420

Please sign in to comment.