From f4180a83d054a05f2bd24de31c6efc32bf5233b2 Mon Sep 17 00:00:00 2001 From: Your Name Date: Tue, 17 Feb 2026 23:40:15 +0800 Subject: [PATCH 1/3] Native engine crashes on concat_ws with literal NULL separator --- .../apache/comet/serde/QueryPlanSerde.scala | 2 +- .../scala/org/apache/comet/serde/strings.scala | 18 +++++++++++++++++- .../sql-tests/expressions/string/concat_ws.sql | 2 +- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala index f627b0c465..6f399fa9fd 100644 --- a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala +++ b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala @@ -149,7 +149,7 @@ object QueryPlanSerde extends Logging with CometExprShim { classOf[Ascii] -> CometScalarFunction("ascii"), classOf[BitLength] -> CometScalarFunction("bit_length"), classOf[Chr] -> CometScalarFunction("char"), - classOf[ConcatWs] -> CometScalarFunction("concat_ws"), + classOf[ConcatWs] -> CometConcatWs, classOf[Concat] -> CometConcat, classOf[Contains] -> CometScalarFunction("contains"), classOf[EndsWith] -> CometScalarFunction("ends_with"), diff --git a/spark/src/main/scala/org/apache/comet/serde/strings.scala b/spark/src/main/scala/org/apache/comet/serde/strings.scala index db60709007..746721d098 100644 --- a/spark/src/main/scala/org/apache/comet/serde/strings.scala +++ b/spark/src/main/scala/org/apache/comet/serde/strings.scala @@ -21,7 +21,7 @@ package org.apache.comet.serde import java.util.Locale -import org.apache.spark.sql.catalyst.expressions.{Attribute, Cast, Concat, Expression, If, InitCap, IsNull, Left, Length, Like, Literal, Lower, RegExpReplace, Right, RLike, StringLPad, StringRepeat, StringRPad, StringSplit, Substring, Upper} +import org.apache.spark.sql.catalyst.expressions.{Attribute, Cast, Concat, ConcatWs, Expression, If, InitCap, IsNull, Left, Length, Like, Literal, Lower, RegExpReplace, Right, RLike, StringLPad, StringRepeat, StringRPad, StringSplit, Substring, Upper} import org.apache.spark.sql.types.{BinaryType, DataTypes, LongType, StringType} import org.apache.spark.unsafe.types.UTF8String @@ -199,6 +199,22 @@ object CometConcat extends CometScalarFunction[Concat]("concat") { } } +object CometConcatWs extends CometExpressionSerde[ConcatWs] { + + override def convert(expr: ConcatWs, inputs: Seq[Attribute], binding: Boolean): Option[Expr] = { + expr.children.headOption match { + // Match Spark behavior: when the separator is NULL, the result of concat_ws is NULL. + case Some(Literal(null, _)) => + val nullLiteral = Literal.create(null, expr.dataType) + exprToProtoInternal(nullLiteral, inputs, binding) + + case _ => + // For all other cases, use the generic scalar function implementation. + CometScalarFunction[ConcatWs]("concat_ws").convert(expr, inputs, binding) + } + } +} + object CometLike extends CometExpressionSerde[Like] { override def convert(expr: Like, inputs: Seq[Attribute], binding: Boolean): Option[Expr] = { diff --git a/spark/src/test/resources/sql-tests/expressions/string/concat_ws.sql b/spark/src/test/resources/sql-tests/expressions/string/concat_ws.sql index 4a3df68965..a94ec764fa 100644 --- a/spark/src/test/resources/sql-tests/expressions/string/concat_ws.sql +++ b/spark/src/test/resources/sql-tests/expressions/string/concat_ws.sql @@ -43,5 +43,5 @@ query SELECT concat_ws(' ', first_name, middle_initial, last_name) FROM names -- literal + literal + literal -query ignore(https://github.com/apache/datafusion-comet/issues/3339) +query SELECT concat_ws(',', 'hello', 'world'), concat_ws(',', '', ''), concat_ws(',', NULL, 'b', 'c'), concat_ws(NULL, 'a', 'b') From c45017903193fe93d2458280273117af5be87cae Mon Sep 17 00:00:00 2001 From: ChenChen Lai <72776271+0lai0@users.noreply.github.com> Date: Wed, 18 Feb 2026 17:26:37 +0800 Subject: [PATCH 2/3] Update spark/src/test/resources/sql-tests/expressions/string/concat_ws.sql Co-authored-by: B Vadlamani <11091419+coderfender@users.noreply.github.com> --- .../test/resources/sql-tests/expressions/string/concat_ws.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spark/src/test/resources/sql-tests/expressions/string/concat_ws.sql b/spark/src/test/resources/sql-tests/expressions/string/concat_ws.sql index a94ec764fa..4a3df68965 100644 --- a/spark/src/test/resources/sql-tests/expressions/string/concat_ws.sql +++ b/spark/src/test/resources/sql-tests/expressions/string/concat_ws.sql @@ -43,5 +43,5 @@ query SELECT concat_ws(' ', first_name, middle_initial, last_name) FROM names -- literal + literal + literal -query +query ignore(https://github.com/apache/datafusion-comet/issues/3339) SELECT concat_ws(',', 'hello', 'world'), concat_ws(',', '', ''), concat_ws(',', NULL, 'b', 'c'), concat_ws(NULL, 'a', 'b') From 405a886d4e3d84e666b75bfa1a8b1fabc257ff3b Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Wed, 18 Feb 2026 06:59:00 -0700 Subject: [PATCH 3/3] fix: fall back to Spark for concat_ws with all foldable args Co-Authored-By: Claude Opus 4.6 --- spark/src/main/scala/org/apache/comet/serde/strings.scala | 5 +++++ .../resources/sql-tests/expressions/string/concat_ws.sql | 4 ++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/spark/src/main/scala/org/apache/comet/serde/strings.scala b/spark/src/main/scala/org/apache/comet/serde/strings.scala index 746721d098..871efd3702 100644 --- a/spark/src/main/scala/org/apache/comet/serde/strings.scala +++ b/spark/src/main/scala/org/apache/comet/serde/strings.scala @@ -208,6 +208,11 @@ object CometConcatWs extends CometExpressionSerde[ConcatWs] { val nullLiteral = Literal.create(null, expr.dataType) exprToProtoInternal(nullLiteral, inputs, binding) + case _ if expr.children.forall(_.foldable) => + // Fall back to Spark for all-literal args so ConstantFolding can handle it + withInfo(expr, "all arguments are foldable") + None + case _ => // For all other cases, use the generic scalar function implementation. CometScalarFunction[ConcatWs]("concat_ws").convert(expr, inputs, binding) diff --git a/spark/src/test/resources/sql-tests/expressions/string/concat_ws.sql b/spark/src/test/resources/sql-tests/expressions/string/concat_ws.sql index 4a3df68965..81ccfb0f36 100644 --- a/spark/src/test/resources/sql-tests/expressions/string/concat_ws.sql +++ b/spark/src/test/resources/sql-tests/expressions/string/concat_ws.sql @@ -42,6 +42,6 @@ INSERT INTO names VALUES(1, 'James', 'B', 'Taylor'), (2, 'Smith', 'C', 'Davis'), query SELECT concat_ws(' ', first_name, middle_initial, last_name) FROM names --- literal + literal + literal -query ignore(https://github.com/apache/datafusion-comet/issues/3339) +-- literal + literal + literal (falls back to Spark when all args are foldable) +query spark_answer_only SELECT concat_ws(',', 'hello', 'world'), concat_ws(',', '', ''), concat_ws(',', NULL, 'b', 'c'), concat_ws(NULL, 'a', 'b')