From 7e44a72e871a4ab6fb0a9fc3c2ea598e366aea60 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Fri, 12 Jun 2026 07:38:21 -0600 Subject: [PATCH] feat: opt from_utc_timestamp, to_utc_timestamp, convert_timezone into codegen dispatch Mixes CodegenDispatchFallback into the three timezone serdes so their Incompatible result (the native parser rejects legacy zone forms like PST / GMT+1, #2013) routes through the JVM codegen dispatcher and matches Spark exactly, including those forms, instead of falling back. The native path stays opt-in via allowIncompatible. Part of #4596. --- docs/source/user-guide/latest/expressions.md | 6 +-- .../org/apache/comet/serde/datetime.scala | 12 +++-- .../datetime/convert_timezone_dispatch.sql | 40 +++++++++++++++++ .../datetime/from_utc_timestamp_dispatch.sql | 45 +++++++++++++++++++ .../datetime/to_utc_timestamp_dispatch.sql | 44 ++++++++++++++++++ 5 files changed, 141 insertions(+), 6 deletions(-) create mode 100644 spark/src/test/resources/sql-tests/expressions/datetime/convert_timezone_dispatch.sql create mode 100644 spark/src/test/resources/sql-tests/expressions/datetime/from_utc_timestamp_dispatch.sql create mode 100644 spark/src/test/resources/sql-tests/expressions/datetime/to_utc_timestamp_dispatch.sql diff --git a/docs/source/user-guide/latest/expressions.md b/docs/source/user-guide/latest/expressions.md index 899fe4b728..0fb23ff7ac 100644 --- a/docs/source/user-guide/latest/expressions.md +++ b/docs/source/user-guide/latest/expressions.md @@ -230,7 +230,7 @@ The type-name conversion functions (`bigint`, `binary`, `boolean`, `date`, `deci | Function | Status | Notes | | --- | --- | --- | | `add_months` | ✅ | | -| `convert_timezone` | ✅ | | +| `convert_timezone` | ✅ | Routes through the JVM codegen dispatcher by default (handles all timezone forms); the native path is opt-in via allowIncompatible ([details](compatibility/expressions/datetime.md)) | | `curdate` | ✅ | Constant-folded to a literal (alias of `current_date`) | | `current_date` | ✅ | Constant-folded to a literal before Comet sees the plan | | `current_time` | 🔜 | Blocked on Spark 4.1 TIME type support ([#4288](https://github.com/apache/datafusion-comet/issues/4288)) | @@ -253,7 +253,7 @@ The type-name conversion functions (`bigint`, `binary`, `boolean`, `date`, `deci | `dayofyear` | ✅ | | | `extract` | ✅ | | | `from_unixtime` | ✅ | | -| `from_utc_timestamp` | ✅ | Legacy zone forms fall back (Incompatible) ([details](compatibility/expressions/datetime.md)) | +| `from_utc_timestamp` | ✅ | Routes through the JVM codegen dispatcher by default (handles all timezone forms); the native path is opt-in via allowIncompatible ([details](compatibility/expressions/datetime.md)) | | `hour` | ✅ | | | `last_day` | ✅ | | | `localtimestamp` | ✅ | | @@ -285,7 +285,7 @@ The type-name conversion functions (`bigint`, `binary`, `boolean`, `date`, `deci | `to_timestamp_ltz` | ✅ | Rewrites to `to_timestamp` (`TimestampType`) | | `to_timestamp_ntz` | ✅ | Rewrites to `to_timestamp` (`TimestampNTZType`) | | `to_unix_timestamp` | ✅ | | -| `to_utc_timestamp` | ✅ | Legacy zone forms fall back (Incompatible) ([details](compatibility/expressions/datetime.md)) | +| `to_utc_timestamp` | ✅ | Routes through the JVM codegen dispatcher by default (handles all timezone forms); the native path is opt-in via allowIncompatible ([details](compatibility/expressions/datetime.md)) | | `trunc` | ✅ | | | `try_make_interval` | 🔜 | Produces legacy CalendarInterval; tracked by [#4540](https://github.com/apache/datafusion-comet/issues/4540) | | `try_make_timestamp` | ✅ | | diff --git a/spark/src/main/scala/org/apache/comet/serde/datetime.scala b/spark/src/main/scala/org/apache/comet/serde/datetime.scala index 5a3a76cc3e..2ce75ccc0d 100644 --- a/spark/src/main/scala/org/apache/comet/serde/datetime.scala +++ b/spark/src/main/scala/org/apache/comet/serde/datetime.scala @@ -357,7 +357,9 @@ private object UTCTimestampSerde { " execution time. See https://github.com/apache/datafusion-comet/issues/2013." } -object CometFromUTCTimestamp extends CometExpressionSerde[FromUTCTimestamp] { +object CometFromUTCTimestamp + extends CometExpressionSerde[FromUTCTimestamp] + with CodegenDispatchFallback { override def getSupportLevel(expr: FromUTCTimestamp): SupportLevel = Incompatible(Some(UTCTimestampSerde.tzParseIncompatReason)) @@ -375,7 +377,9 @@ object CometFromUTCTimestamp extends CometExpressionSerde[FromUTCTimestamp] { } } -object CometToUTCTimestamp extends CometExpressionSerde[ToUTCTimestamp] { +object CometToUTCTimestamp + extends CometExpressionSerde[ToUTCTimestamp] + with CodegenDispatchFallback { override def getSupportLevel(expr: ToUTCTimestamp): SupportLevel = Incompatible(Some(UTCTimestampSerde.tzParseIncompatReason)) @@ -393,7 +397,9 @@ object CometToUTCTimestamp extends CometExpressionSerde[ToUTCTimestamp] { } } -object CometConvertTimezone extends CometExpressionSerde[ConvertTimezone] { +object CometConvertTimezone + extends CometExpressionSerde[ConvertTimezone] + with CodegenDispatchFallback { override def getSupportLevel(expr: ConvertTimezone): SupportLevel = Incompatible(Some(UTCTimestampSerde.tzParseIncompatReason)) diff --git a/spark/src/test/resources/sql-tests/expressions/datetime/convert_timezone_dispatch.sql b/spark/src/test/resources/sql-tests/expressions/datetime/convert_timezone_dispatch.sql new file mode 100644 index 0000000000..9835bd23f8 --- /dev/null +++ b/spark/src/test/resources/sql-tests/expressions/datetime/convert_timezone_dispatch.sql @@ -0,0 +1,40 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +-- CometConvertTimezone mixes in CodegenDispatchFallback, so with allowIncompatible unset it routes +-- through the JVM codegen dispatcher and matches Spark exactly, including the legacy timezone forms +-- the native parser rejects. Verified across two session zones. +-- ConfigMatrix: spark.sql.session.timeZone=UTC,America/Los_Angeles + +statement +CREATE TABLE test_ct_dispatch(ts timestamp_ntz, src string, tgt string) USING parquet + +statement +INSERT INTO test_ct_dispatch VALUES + (timestamp_ntz'2021-12-06 08:00:00', 'UTC', 'America/Los_Angeles'), + (timestamp_ntz'2021-07-01 12:00:00', 'America/New_York', 'Asia/Tokyo'), + (NULL, 'UTC', 'Asia/Tokyo') + +query +SELECT convert_timezone(src, tgt, ts) FROM test_ct_dispatch + +query +SELECT convert_timezone('UTC', 'America/Los_Angeles', timestamp_ntz'2021-12-06 08:00:00') + +-- legacy forms the native parser rejects but the dispatcher handles +query +SELECT convert_timezone('GMT+1', 'PST', timestamp_ntz'2021-12-06 08:00:00') diff --git a/spark/src/test/resources/sql-tests/expressions/datetime/from_utc_timestamp_dispatch.sql b/spark/src/test/resources/sql-tests/expressions/datetime/from_utc_timestamp_dispatch.sql new file mode 100644 index 0000000000..d00015e705 --- /dev/null +++ b/spark/src/test/resources/sql-tests/expressions/datetime/from_utc_timestamp_dispatch.sql @@ -0,0 +1,45 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +-- CometFromUTCTimestamp mixes in CodegenDispatchFallback, so with allowIncompatible unset it +-- routes through the JVM codegen dispatcher (Spark's own doGenCode) and matches Spark exactly, +-- including the legacy timezone forms the native parser rejects (e.g. PST, GMT+1). Verified across +-- two session zones to confirm the resolved timeZoneId survives closure serialization. +-- ConfigMatrix: spark.sql.session.timeZone=UTC,America/Los_Angeles + +statement +CREATE TABLE test_futc_dispatch(ts timestamp) USING parquet + +statement +INSERT INTO test_futc_dispatch VALUES + (timestamp('2015-07-24 00:00:00')), + (timestamp('2015-01-24 00:00:00')), + (timestamp('1969-12-31 23:59:59')), + (NULL) + +query +SELECT from_utc_timestamp(ts, 'America/Los_Angeles') FROM test_futc_dispatch + +query +SELECT from_utc_timestamp(ts, '+02:00') FROM test_futc_dispatch + +-- legacy forms the native parser rejects but the dispatcher handles +query +SELECT from_utc_timestamp(ts, 'PST') FROM test_futc_dispatch + +query +SELECT from_utc_timestamp(timestamp('2017-07-14 02:40:00'), 'GMT+1'), from_utc_timestamp(timestamp('2017-07-14 02:40:00'), 'Etc/GMT-1') diff --git a/spark/src/test/resources/sql-tests/expressions/datetime/to_utc_timestamp_dispatch.sql b/spark/src/test/resources/sql-tests/expressions/datetime/to_utc_timestamp_dispatch.sql new file mode 100644 index 0000000000..a461d6c9e8 --- /dev/null +++ b/spark/src/test/resources/sql-tests/expressions/datetime/to_utc_timestamp_dispatch.sql @@ -0,0 +1,44 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +-- CometToUTCTimestamp mixes in CodegenDispatchFallback, so with allowIncompatible unset it routes +-- through the JVM codegen dispatcher and matches Spark exactly, including the legacy timezone forms +-- the native parser rejects. Verified across two session zones. +-- ConfigMatrix: spark.sql.session.timeZone=UTC,America/Los_Angeles + +statement +CREATE TABLE test_tutc_dispatch(ts timestamp) USING parquet + +statement +INSERT INTO test_tutc_dispatch VALUES + (timestamp('2015-07-24 00:00:00')), + (timestamp('2015-01-24 00:00:00')), + (timestamp('1969-12-31 23:59:59')), + (NULL) + +query +SELECT to_utc_timestamp(ts, 'America/Los_Angeles') FROM test_tutc_dispatch + +query +SELECT to_utc_timestamp(ts, '+02:00') FROM test_tutc_dispatch + +-- legacy forms the native parser rejects but the dispatcher handles +query +SELECT to_utc_timestamp(ts, 'PST') FROM test_tutc_dispatch + +query +SELECT to_utc_timestamp(timestamp('2017-07-14 02:40:00'), 'GMT+1'), to_utc_timestamp(timestamp('2017-07-14 02:40:00'), 'Etc/GMT-1')