Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion spark/src/main/scala/org/apache/comet/serde/strings.scala
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,10 @@ object CometRight extends CometExpressionSerde[Right] {
}
}

object CometConcat extends CometScalarFunction[Concat]("concat") with CometTypeShim {
object CometConcat
extends CometScalarFunction[Concat]("concat")
with CometTypeShim
with CodegenDispatchFallback {
private val unsupportedReason = "CONCAT supports only string input parameters"

// Spark 4.0 widens Concat to accept collated strings and preserves the collation in the merged
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,19 +32,21 @@ SELECT collation('hello' COLLATE UTF8_BINARY)
query
SELECT collation(CAST(NULL AS STRING))

-- concat preserves a non-default collation in its result type, but Comet's native concat produces
-- UTF8_BINARY, so it is Incompatible and falls back to Spark by default.
query expect_fallback(concat does not support non-UTF8_BINARY collations)
-- concat preserves a non-default collation in its result type, which Comet's native concat does
-- not, so concat is Incompatible. It is enrolled in the JVM codegen dispatcher, which runs Spark's
-- own doGenCode inside the Comet pipeline, so a collated concat is evaluated natively and matches
-- Spark.
query
SELECT concat('Hello' COLLATE UTF8_LCASE, 'World' COLLATE UTF8_LCASE)

-- reverse is enrolled in the JVM codegen dispatcher, which runs Spark's own doGenCode inside the
-- Comet pipeline, so a collated string is evaluated natively and matches Spark.
query
SELECT reverse('Hello' COLLATE UTF8_LCASE)

-- A standard ICU collation (UNICODE_CI) still falls back for concat, confirming the gate covers
-- any non-UTF8_BINARY collation rather than just UTF8_LCASE.
query expect_fallback(concat does not support non-UTF8_BINARY collations)
-- A standard ICU collation (UNICODE_CI) also dispatches and matches Spark, confirming the path
-- covers any non-UTF8_BINARY collation rather than just UTF8_LCASE.
query
SELECT concat('Hello' COLLATE UNICODE_CI, 'World' COLLATE UNICODE_CI)

query
Expand Down
Loading