Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions docs/source/user-guide/latest/expressions.md
Original file line number Diff line number Diff line change
Expand Up @@ -138,10 +138,10 @@ The tables below list every Spark built-in expression with its current status.
| `array_compact` | ✅ | |
| `array_contains` | ✅ | NaN/signed-zero handling may differ ([details](compatibility/floating-point.md)) |
| `array_distinct` | ✅ | NaN/signed-zero handling may differ ([details](compatibility/floating-point.md)) |
| `array_except` | ✅ | Incompatible; falls back by default ([details](compatibility/expressions/array.md)) |
| `array_except` | ✅ | Routes through the JVM codegen dispatcher by default; the incompatible native path is opt-in via allowIncompatible ([details](compatibility/expressions/array.md)) |
| `array_insert` | ✅ | |
| `array_intersect` | ✅ | Incompatible; falls back by default ([details](compatibility/expressions/array.md)) |
| `array_join` | ✅ | Incompatible; falls back by default ([details](compatibility/expressions/array.md)) |
| `array_intersect` | ✅ | Routes through the JVM codegen dispatcher by default; the incompatible native path is opt-in via allowIncompatible ([details](compatibility/expressions/array.md)) |
| `array_join` | ✅ | Routes through the JVM codegen dispatcher by default; the incompatible native path is opt-in via allowIncompatible ([details](compatibility/expressions/array.md)) |
| `array_max` | ✅ | NaN ordering may differ ([details](compatibility/floating-point.md)) |
| `array_min` | ✅ | NaN ordering may differ ([details](compatibility/floating-point.md)) |
| `array_position` | ✅ | Binary/struct/map/null elements fall back |
Expand Down
12 changes: 9 additions & 3 deletions spark/src/main/scala/org/apache/comet/serde/arrays.scala
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,10 @@ object CometSortArray extends CometExpressionSerde[SortArray] {
}
}

object CometArrayIntersect extends CometExpressionSerde[ArrayIntersect] with CometTypeShim {
object CometArrayIntersect
extends CometExpressionSerde[ArrayIntersect]
with CometTypeShim
with CodegenDispatchFallback {

private val incompatReason: String =
"Result array element order may differ from Spark when the right array is longer " +
Expand Down Expand Up @@ -328,7 +331,10 @@ object CometArrayCompact extends CometExpressionSerde[Expression] {
}
}

object CometArrayExcept extends CometExpressionSerde[ArrayExcept] with CometExprShim {
object CometArrayExcept
extends CometExpressionSerde[ArrayExcept]
with CometExprShim
with CodegenDispatchFallback {

private val incompatReason = "Null handling and ordering may differ from Spark"

Expand Down Expand Up @@ -372,7 +378,7 @@ object CometArrayExcept extends CometExpressionSerde[ArrayExcept] with CometExpr
}
}

object CometArrayJoin extends CometExpressionSerde[ArrayJoin] {
object CometArrayJoin extends CometExpressionSerde[ArrayJoin] with CodegenDispatchFallback {

private val incompatReason = "Null handling may differ from Spark"

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
-- Licensed to the Apache Software Foundation (ASF) under one
-- or more contributor license agreements. See the NOTICE file
-- distributed with this work for additional information
-- regarding copyright ownership. The ASF licenses this file
-- to you under the Apache License, Version 2.0 (the
-- "License"); you may not use this file except in compliance
-- with the License. You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing,
-- software distributed under the License is distributed on an
-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-- KIND, either express or implied. See the License for the
-- specific language governing permissions and limitations
-- under the License.

-- ArrayExcept mixes in CodegenDispatchFallback, so with allowIncompatible unset its Incompatible
-- null-handling/ordering case routes through the JVM codegen dispatcher and matches Spark exactly,
-- including the literal/literal case the native path could not handle.

statement
CREATE TABLE test_ae_dispatch(a array<int>, b array<int>) USING parquet

statement
INSERT INTO test_ae_dispatch VALUES (array(1, 2, 3), array(2, 3, 4)), (array(1, 2), array()), (array(), array(1)), (NULL, array(1)), (array(1, NULL), array(NULL))

query
SELECT array_except(a, b) FROM test_ae_dispatch

query
SELECT array_except(array(1, 2, 3), array(2, 3, 4)), array_except(array(1, 2), array()), array_except(array(), array(1)), array_except(cast(NULL as array<int>), array(1))
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
-- Licensed to the Apache Software Foundation (ASF) under one
-- or more contributor license agreements. See the NOTICE file
-- distributed with this work for additional information
-- regarding copyright ownership. The ASF licenses this file
-- to you under the Apache License, Version 2.0 (the
-- "License"); you may not use this file except in compliance
-- with the License. You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing,
-- software distributed under the License is distributed on an
-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-- KIND, either express or implied. See the License for the
-- specific language governing permissions and limitations
-- under the License.

-- ArrayIntersect mixes in CodegenDispatchFallback, so with allowIncompatible unset its
-- Incompatible element-order case routes through the JVM codegen dispatcher and matches Spark
-- exactly, including the right-longer-than-left case the native path orders differently (no
-- sort_array workaround needed here).

statement
CREATE TABLE test_ai_dispatch(a array<int>, b array<int>) USING parquet

statement
INSERT INTO test_ai_dispatch VALUES (array(2, 1), array(3, 1, 2)), (array(3, 1), array(1, 2, 3, 4)), (array(1, NULL), array(NULL, 2)), (NULL, array(1))

query
SELECT array_intersect(a, b) FROM test_ai_dispatch

query
SELECT array_intersect(array(2, 1), array(3, 1, 2)), array_intersect(array(3, 1), array(1, 2, 3, 4))
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ CREATE TABLE test_array_join(arr array<string>) USING parquet
statement
INSERT INTO test_array_join VALUES (array('a', 'b', 'c')), (array('hello', 'world')), (array()), (NULL), (array('a', NULL, 'c'))

query spark_answer_only
query
SELECT array_join(arr, ',') FROM test_array_join

query spark_answer_only
query
SELECT array_join(arr, ',', 'NULL') FROM test_array_join
Loading