From f34dee18a50764da842abd7dc52f47349f2d2814 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Mon, 22 Dec 2025 14:47:34 -0700 Subject: [PATCH 1/6] chore: upgrade Spark 4.x support from 4.0.1 to 4.1.0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update the spark-4.0 profile to use Spark 4.1.0 and add shims for API changes: - Sum.evalMode -> Sum.evalContext.evalMode (SPARK-53968) - BinaryOutputStyle config now returns enum directly - MapStatus.apply() requires new checksumVal parameter (SPARK-51756) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- .github/workflows/spark_sql_test.yml | 4 +-- dev/release/create-tarball.sh | 2 +- docs/source/user-guide/latest/installation.md | 2 +- pom.xml | 2 +- .../CometBypassMergeSortShuffleWriter.java | 7 +++-- .../shuffle/CometUnsafeShuffleWriter.java | 5 ++-- .../org/apache/comet/serde/aggregates.scala | 7 +++-- .../org/apache/comet/shims/CometAggShim.scala | 27 +++++++++++++++++ .../spark/sql/comet/shims/ShimMapStatus.scala | 29 +++++++++++++++++++ .../org/apache/comet/shims/CometAggShim.scala | 27 +++++++++++++++++ .../apache/comet/shims/CometExprShim.scala | 4 +-- .../spark/sql/comet/shims/ShimMapStatus.scala | 29 +++++++++++++++++++ 12 files changed, 129 insertions(+), 16 deletions(-) create mode 100644 spark/src/main/spark-3.x/org/apache/comet/shims/CometAggShim.scala create mode 100644 spark/src/main/spark-3.x/org/apache/spark/sql/comet/shims/ShimMapStatus.scala create mode 100644 spark/src/main/spark-4.0/org/apache/comet/shims/CometAggShim.scala create mode 100644 spark/src/main/spark-4.0/org/apache/spark/sql/comet/shims/ShimMapStatus.scala diff --git a/.github/workflows/spark_sql_test.yml b/.github/workflows/spark_sql_test.yml index 3d7aa2e2f9..09b85515ea 100644 --- a/.github/workflows/spark_sql_test.yml +++ b/.github/workflows/spark_sql_test.yml @@ -50,7 +50,7 @@ jobs: strategy: matrix: os: [ubuntu-24.04] - spark-version: [{short: '3.4', full: '3.4.3', java: 11}, {short: '3.5', full: '3.5.7', java: 11}, {short: '4.0', full: '4.0.1', java: 17}] + spark-version: [{short: '3.4', full: '3.4.3', java: 11}, {short: '3.5', full: '3.5.7', java: 11}, {short: '4.0', full: '4.1.0', java: 17}] module: - {name: "catalyst", args1: "catalyst/test", args2: ""} - {name: "sql_core-1", args1: "", args2: sql/testOnly * -- -l org.apache.spark.tags.ExtendedSQLTest -l org.apache.spark.tags.SlowSQLTest} @@ -61,7 +61,7 @@ jobs: - {name: "sql_hive-3", args1: "", args2: "hive/testOnly * -- -n org.apache.spark.tags.SlowHiveTest"} # Skip sql_hive-1 for Spark 4.0 due to https://github.com/apache/datafusion-comet/issues/2946 exclude: - - spark-version: {short: '4.0', full: '4.0.1', java: 17} + - spark-version: {short: '4.0', full: '4.1.0', java: 17} module: {name: "sql_hive-1", args1: "", args2: "hive/testOnly * -- -l org.apache.spark.tags.ExtendedHiveTest -l org.apache.spark.tags.SlowHiveTest"} fail-fast: false name: spark-sql-${{ matrix.module.name }}/${{ matrix.os }}/spark-${{ matrix.spark-version.full }}/java-${{ matrix.spark-version.java }} diff --git a/dev/release/create-tarball.sh b/dev/release/create-tarball.sh index 6387cf485a..8fe5af0607 100755 --- a/dev/release/create-tarball.sh +++ b/dev/release/create-tarball.sh @@ -110,7 +110,7 @@ echo "---------------------------------------------------------" # create containing the files in git at $release_hash -# the files in the tarball are prefixed with {version} (e.g. 4.0.1) +# the files in the tarball are prefixed with {version} (e.g. 4.1.0) mkdir -p ${distdir} (cd "${DEV_RELEASE_TOP_DIR}" && git archive ${release_hash} --prefix ${release}/ | gzip > ${tarball}) diff --git a/docs/source/user-guide/latest/installation.md b/docs/source/user-guide/latest/installation.md index d74fee36b8..c250c98aed 100644 --- a/docs/source/user-guide/latest/installation.md +++ b/docs/source/user-guide/latest/installation.md @@ -49,7 +49,7 @@ use only and should not be used in production yet. | Spark Version | Java Version | Scala Version | Comet Tests in CI | Spark SQL Tests in CI | | ------------- | ------------ | ------------- | ----------------- | --------------------- | -| 4.0.1 | 17 | 2.13 | Yes | Yes | +| 4.1.0 | 17 | 2.13 | Yes | Yes | Note that Comet may not fully work with proprietary forks of Apache Spark such as the Spark versions offered by Cloud Service Providers. diff --git a/pom.xml b/pom.xml index ab42aa773e..4e7848ae99 100644 --- a/pom.xml +++ b/pom.xml @@ -637,7 +637,7 @@ under the License. 2.13.16 2.13 - 4.0.1 + 4.1.0 4.0 1.15.2 4.13.6 diff --git a/spark/src/main/java/org/apache/spark/sql/comet/execution/shuffle/CometBypassMergeSortShuffleWriter.java b/spark/src/main/java/org/apache/spark/sql/comet/execution/shuffle/CometBypassMergeSortShuffleWriter.java index a58ec7851b..6dc3fd6d7c 100644 --- a/spark/src/main/java/org/apache/spark/sql/comet/execution/shuffle/CometBypassMergeSortShuffleWriter.java +++ b/spark/src/main/java/org/apache/spark/sql/comet/execution/shuffle/CometBypassMergeSortShuffleWriter.java @@ -41,7 +41,6 @@ import org.apache.spark.memory.TaskMemoryManager; import org.apache.spark.network.shuffle.checksum.ShuffleChecksumHelper; import org.apache.spark.scheduler.MapStatus; -import org.apache.spark.scheduler.MapStatus$; import org.apache.spark.serializer.SerializerInstance; import org.apache.spark.shuffle.ShuffleWriteMetricsReporter; import org.apache.spark.shuffle.ShuffleWriter; @@ -54,6 +53,7 @@ import org.apache.spark.shuffle.comet.CometShuffleMemoryAllocatorTrait; import org.apache.spark.shuffle.sort.CometShuffleExternalSorter; import org.apache.spark.sql.catalyst.expressions.UnsafeRow; +import org.apache.spark.sql.comet.shims.ShimMapStatus$; import org.apache.spark.sql.types.StructType; import org.apache.spark.storage.BlockManager; import org.apache.spark.storage.FileSegment; @@ -172,7 +172,7 @@ public void write(Iterator> records) throws IOException { .commitAllPartitions(ShuffleChecksumHelper.EMPTY_CHECKSUM_VALUE) .getPartitionLengths(); mapStatus = - MapStatus$.MODULE$.apply(blockManager.shuffleServerId(), partitionLengths, mapId); + ShimMapStatus$.MODULE$.apply(blockManager.shuffleServerId(), partitionLengths, mapId); return; } final long openStartTime = System.nanoTime(); @@ -261,7 +261,8 @@ public void write(Iterator> records) throws IOException { // TODO: We probably can move checksum generation here when concatenating partition files partitionLengths = writePartitionedData(mapOutputWriter); - mapStatus = MapStatus$.MODULE$.apply(blockManager.shuffleServerId(), partitionLengths, mapId); + mapStatus = + ShimMapStatus$.MODULE$.apply(blockManager.shuffleServerId(), partitionLengths, mapId); } catch (Exception e) { try { mapOutputWriter.abort(e); diff --git a/spark/src/main/java/org/apache/spark/sql/comet/execution/shuffle/CometUnsafeShuffleWriter.java b/spark/src/main/java/org/apache/spark/sql/comet/execution/shuffle/CometUnsafeShuffleWriter.java index a845e743d4..d736512a86 100644 --- a/spark/src/main/java/org/apache/spark/sql/comet/execution/shuffle/CometUnsafeShuffleWriter.java +++ b/spark/src/main/java/org/apache/spark/sql/comet/execution/shuffle/CometUnsafeShuffleWriter.java @@ -50,7 +50,6 @@ import org.apache.spark.network.shuffle.checksum.ShuffleChecksumHelper; import org.apache.spark.network.util.LimitedInputStream; import org.apache.spark.scheduler.MapStatus; -import org.apache.spark.scheduler.MapStatus$; import org.apache.spark.serializer.SerializationStream; import org.apache.spark.serializer.SerializerInstance; import org.apache.spark.shuffle.BaseShuffleHandle; @@ -67,6 +66,7 @@ import org.apache.spark.shuffle.sort.SortShuffleManager; import org.apache.spark.shuffle.sort.UnsafeShuffleWriter; import org.apache.spark.sql.catalyst.expressions.UnsafeRow; +import org.apache.spark.sql.comet.shims.ShimMapStatus$; import org.apache.spark.sql.types.StructType; import org.apache.spark.storage.BlockManager; import org.apache.spark.storage.TimeTrackingOutputStream; @@ -288,7 +288,8 @@ void closeAndWriteOutput() throws IOException { } } } - mapStatus = MapStatus$.MODULE$.apply(blockManager.shuffleServerId(), partitionLengths, mapId); + mapStatus = + ShimMapStatus$.MODULE$.apply(blockManager.shuffleServerId(), partitionLengths, mapId); } @VisibleForTesting diff --git a/spark/src/main/scala/org/apache/comet/serde/aggregates.scala b/spark/src/main/scala/org/apache/comet/serde/aggregates.scala index 8ab568dc83..7e3e978d8f 100644 --- a/spark/src/main/scala/org/apache/comet/serde/aggregates.scala +++ b/spark/src/main/scala/org/apache/comet/serde/aggregates.scala @@ -30,7 +30,7 @@ import org.apache.comet.CometConf import org.apache.comet.CometConf.COMET_EXEC_STRICT_FLOATING_POINT import org.apache.comet.CometSparkSessionExtensions.withInfo import org.apache.comet.serde.QueryPlanSerde.{evalModeToProto, exprToProto, serializeDataType} -import org.apache.comet.shims.CometEvalModeUtil +import org.apache.comet.shims.{CometAggShim, CometEvalModeUtil} object CometMin extends CometAggregateExpressionSerde[Min] { @@ -214,7 +214,7 @@ object CometAverage extends CometAggregateExpressionSerde[Average] { object CometSum extends CometAggregateExpressionSerde[Sum] { override def getSupportLevel(sum: Sum): SupportLevel = { - sum.evalMode match { + CometAggShim.getSumEvalMode(sum) match { case EvalMode.ANSI if !sum.dataType.isInstanceOf[DecimalType] => Incompatible(Some("ANSI mode for non decimal inputs is not supported")) case EvalMode.TRY if !sum.dataType.isInstanceOf[DecimalType] => @@ -243,7 +243,8 @@ object CometSum extends CometAggregateExpressionSerde[Sum] { val builder = ExprOuterClass.Sum.newBuilder() builder.setChild(childExpr.get) builder.setDatatype(dataType.get) - builder.setEvalMode(evalModeToProto(CometEvalModeUtil.fromSparkEvalMode(sum.evalMode))) + val evalMode = CometEvalModeUtil.fromSparkEvalMode(CometAggShim.getSumEvalMode(sum)) + builder.setEvalMode(evalModeToProto(evalMode)) Some( ExprOuterClass.AggExpr diff --git a/spark/src/main/spark-3.x/org/apache/comet/shims/CometAggShim.scala b/spark/src/main/spark-3.x/org/apache/comet/shims/CometAggShim.scala new file mode 100644 index 0000000000..2c311a6373 --- /dev/null +++ b/spark/src/main/spark-3.x/org/apache/comet/shims/CometAggShim.scala @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.comet.shims + +import org.apache.spark.sql.catalyst.expressions.EvalMode +import org.apache.spark.sql.catalyst.expressions.aggregate.Sum + +object CometAggShim { + def getSumEvalMode(sum: Sum): EvalMode.Value = sum.evalMode +} diff --git a/spark/src/main/spark-3.x/org/apache/spark/sql/comet/shims/ShimMapStatus.scala b/spark/src/main/spark-3.x/org/apache/spark/sql/comet/shims/ShimMapStatus.scala new file mode 100644 index 0000000000..280a086602 --- /dev/null +++ b/spark/src/main/spark-3.x/org/apache/spark/sql/comet/shims/ShimMapStatus.scala @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.spark.sql.comet.shims + +import org.apache.spark.scheduler.MapStatus +import org.apache.spark.storage.BlockManagerId + +object ShimMapStatus { + def apply(loc: BlockManagerId, uncompressedSizes: Array[Long], mapTaskId: Long): MapStatus = { + MapStatus(loc, uncompressedSizes, mapTaskId) + } +} diff --git a/spark/src/main/spark-4.0/org/apache/comet/shims/CometAggShim.scala b/spark/src/main/spark-4.0/org/apache/comet/shims/CometAggShim.scala new file mode 100644 index 0000000000..d34915389c --- /dev/null +++ b/spark/src/main/spark-4.0/org/apache/comet/shims/CometAggShim.scala @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.comet.shims + +import org.apache.spark.sql.catalyst.expressions.EvalMode +import org.apache.spark.sql.catalyst.expressions.aggregate.Sum + +object CometAggShim { + def getSumEvalMode(sum: Sum): EvalMode.Value = sum.evalContext.evalMode +} diff --git a/spark/src/main/spark-4.0/org/apache/comet/shims/CometExprShim.scala b/spark/src/main/spark-4.0/org/apache/comet/shims/CometExprShim.scala index fc3db183b3..21d18f1e1e 100644 --- a/spark/src/main/spark-4.0/org/apache/comet/shims/CometExprShim.scala +++ b/spark/src/main/spark-4.0/org/apache/comet/shims/CometExprShim.scala @@ -39,9 +39,7 @@ trait CometExprShim extends CommonStringExprs { CometEvalModeUtil.fromSparkEvalMode(c.evalMode) protected def binaryOutputStyle: BinaryOutputStyle = { - SQLConf.get - .getConf(SQLConf.BINARY_OUTPUT_STYLE) - .map(SQLConf.BinaryOutputStyle.withName) match { + SQLConf.get.getConf(SQLConf.BINARY_OUTPUT_STYLE) match { case Some(SQLConf.BinaryOutputStyle.UTF8) => BinaryOutputStyle.UTF8 case Some(SQLConf.BinaryOutputStyle.BASIC) => BinaryOutputStyle.BASIC case Some(SQLConf.BinaryOutputStyle.BASE64) => BinaryOutputStyle.BASE64 diff --git a/spark/src/main/spark-4.0/org/apache/spark/sql/comet/shims/ShimMapStatus.scala b/spark/src/main/spark-4.0/org/apache/spark/sql/comet/shims/ShimMapStatus.scala new file mode 100644 index 0000000000..f805a9b542 --- /dev/null +++ b/spark/src/main/spark-4.0/org/apache/spark/sql/comet/shims/ShimMapStatus.scala @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.spark.sql.comet.shims + +import org.apache.spark.scheduler.MapStatus +import org.apache.spark.storage.BlockManagerId + +object ShimMapStatus { + def apply(loc: BlockManagerId, uncompressedSizes: Array[Long], mapTaskId: Long): MapStatus = { + MapStatus(loc, uncompressedSizes, mapTaskId, 0L) + } +} From 118ca281158066e9867ee7de3b464f19a914a053 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Mon, 22 Dec 2025 14:53:21 -0700 Subject: [PATCH 2/6] chore: rename spark-4.0 profile and shims directory to spark-4.1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- pom.xml | 6 +++--- spark/pom.xml | 4 ++-- .../org/apache/comet/shims/CometAggShim.scala | 0 .../org/apache/comet/shims/CometExprShim.scala | 0 .../apache/comet/shims/ShimCometBroadcastExchangeExec.scala | 0 .../apache/comet/shims/ShimCometShuffleExchangeExec.scala | 0 .../comet/shims/ShimCometSparkSessionExtensions.scala | 0 .../org/apache/comet/shims/ShimSQLConf.scala | 0 .../apache/spark/comet/shims/ShimCometDriverPlugin.scala | 0 .../apache/spark/sql/comet/shims/ShimCometScanExec.scala | 0 .../sql/comet/shims/ShimCometShuffleWriteProcessor.scala | 0 .../org/apache/spark/sql/comet/shims/ShimMapStatus.scala | 0 .../sql/comet/shims/ShimStreamSourceAwareSparkPlan.scala | 0 13 files changed, 5 insertions(+), 5 deletions(-) rename spark/src/main/{spark-4.0 => spark-4.1}/org/apache/comet/shims/CometAggShim.scala (100%) rename spark/src/main/{spark-4.0 => spark-4.1}/org/apache/comet/shims/CometExprShim.scala (100%) rename spark/src/main/{spark-4.0 => spark-4.1}/org/apache/comet/shims/ShimCometBroadcastExchangeExec.scala (100%) rename spark/src/main/{spark-4.0 => spark-4.1}/org/apache/comet/shims/ShimCometShuffleExchangeExec.scala (100%) rename spark/src/main/{spark-4.0 => spark-4.1}/org/apache/comet/shims/ShimCometSparkSessionExtensions.scala (100%) rename spark/src/main/{spark-4.0 => spark-4.1}/org/apache/comet/shims/ShimSQLConf.scala (100%) rename spark/src/main/{spark-4.0 => spark-4.1}/org/apache/spark/comet/shims/ShimCometDriverPlugin.scala (100%) rename spark/src/main/{spark-4.0 => spark-4.1}/org/apache/spark/sql/comet/shims/ShimCometScanExec.scala (100%) rename spark/src/main/{spark-4.0 => spark-4.1}/org/apache/spark/sql/comet/shims/ShimCometShuffleWriteProcessor.scala (100%) rename spark/src/main/{spark-4.0 => spark-4.1}/org/apache/spark/sql/comet/shims/ShimMapStatus.scala (100%) rename spark/src/main/{spark-4.0 => spark-4.1}/org/apache/spark/sql/comet/shims/ShimStreamSourceAwareSparkPlan.scala (100%) diff --git a/pom.xml b/pom.xml index 4e7848ae99..91c64e6f01 100644 --- a/pom.xml +++ b/pom.xml @@ -632,17 +632,17 @@ under the License. - spark-4.0 + spark-4.1 2.13.16 2.13 4.1.0 - 4.0 + 4.1 1.15.2 4.13.6 2.0.16 - spark-4.0 + spark-4.1 not-needed-yet 17 diff --git a/spark/pom.xml b/spark/pom.xml index 3b832e37a2..13f7772fe6 100644 --- a/spark/pom.xml +++ b/spark/pom.xml @@ -233,7 +233,7 @@ under the License. - spark-4.0 + spark-4.1 org.apache.iceberg @@ -241,7 +241,7 @@ under the License. 1.10.0 test - + org.eclipse.jetty jetty-server diff --git a/spark/src/main/spark-4.0/org/apache/comet/shims/CometAggShim.scala b/spark/src/main/spark-4.1/org/apache/comet/shims/CometAggShim.scala similarity index 100% rename from spark/src/main/spark-4.0/org/apache/comet/shims/CometAggShim.scala rename to spark/src/main/spark-4.1/org/apache/comet/shims/CometAggShim.scala diff --git a/spark/src/main/spark-4.0/org/apache/comet/shims/CometExprShim.scala b/spark/src/main/spark-4.1/org/apache/comet/shims/CometExprShim.scala similarity index 100% rename from spark/src/main/spark-4.0/org/apache/comet/shims/CometExprShim.scala rename to spark/src/main/spark-4.1/org/apache/comet/shims/CometExprShim.scala diff --git a/spark/src/main/spark-4.0/org/apache/comet/shims/ShimCometBroadcastExchangeExec.scala b/spark/src/main/spark-4.1/org/apache/comet/shims/ShimCometBroadcastExchangeExec.scala similarity index 100% rename from spark/src/main/spark-4.0/org/apache/comet/shims/ShimCometBroadcastExchangeExec.scala rename to spark/src/main/spark-4.1/org/apache/comet/shims/ShimCometBroadcastExchangeExec.scala diff --git a/spark/src/main/spark-4.0/org/apache/comet/shims/ShimCometShuffleExchangeExec.scala b/spark/src/main/spark-4.1/org/apache/comet/shims/ShimCometShuffleExchangeExec.scala similarity index 100% rename from spark/src/main/spark-4.0/org/apache/comet/shims/ShimCometShuffleExchangeExec.scala rename to spark/src/main/spark-4.1/org/apache/comet/shims/ShimCometShuffleExchangeExec.scala diff --git a/spark/src/main/spark-4.0/org/apache/comet/shims/ShimCometSparkSessionExtensions.scala b/spark/src/main/spark-4.1/org/apache/comet/shims/ShimCometSparkSessionExtensions.scala similarity index 100% rename from spark/src/main/spark-4.0/org/apache/comet/shims/ShimCometSparkSessionExtensions.scala rename to spark/src/main/spark-4.1/org/apache/comet/shims/ShimCometSparkSessionExtensions.scala diff --git a/spark/src/main/spark-4.0/org/apache/comet/shims/ShimSQLConf.scala b/spark/src/main/spark-4.1/org/apache/comet/shims/ShimSQLConf.scala similarity index 100% rename from spark/src/main/spark-4.0/org/apache/comet/shims/ShimSQLConf.scala rename to spark/src/main/spark-4.1/org/apache/comet/shims/ShimSQLConf.scala diff --git a/spark/src/main/spark-4.0/org/apache/spark/comet/shims/ShimCometDriverPlugin.scala b/spark/src/main/spark-4.1/org/apache/spark/comet/shims/ShimCometDriverPlugin.scala similarity index 100% rename from spark/src/main/spark-4.0/org/apache/spark/comet/shims/ShimCometDriverPlugin.scala rename to spark/src/main/spark-4.1/org/apache/spark/comet/shims/ShimCometDriverPlugin.scala diff --git a/spark/src/main/spark-4.0/org/apache/spark/sql/comet/shims/ShimCometScanExec.scala b/spark/src/main/spark-4.1/org/apache/spark/sql/comet/shims/ShimCometScanExec.scala similarity index 100% rename from spark/src/main/spark-4.0/org/apache/spark/sql/comet/shims/ShimCometScanExec.scala rename to spark/src/main/spark-4.1/org/apache/spark/sql/comet/shims/ShimCometScanExec.scala diff --git a/spark/src/main/spark-4.0/org/apache/spark/sql/comet/shims/ShimCometShuffleWriteProcessor.scala b/spark/src/main/spark-4.1/org/apache/spark/sql/comet/shims/ShimCometShuffleWriteProcessor.scala similarity index 100% rename from spark/src/main/spark-4.0/org/apache/spark/sql/comet/shims/ShimCometShuffleWriteProcessor.scala rename to spark/src/main/spark-4.1/org/apache/spark/sql/comet/shims/ShimCometShuffleWriteProcessor.scala diff --git a/spark/src/main/spark-4.0/org/apache/spark/sql/comet/shims/ShimMapStatus.scala b/spark/src/main/spark-4.1/org/apache/spark/sql/comet/shims/ShimMapStatus.scala similarity index 100% rename from spark/src/main/spark-4.0/org/apache/spark/sql/comet/shims/ShimMapStatus.scala rename to spark/src/main/spark-4.1/org/apache/spark/sql/comet/shims/ShimMapStatus.scala diff --git a/spark/src/main/spark-4.0/org/apache/spark/sql/comet/shims/ShimStreamSourceAwareSparkPlan.scala b/spark/src/main/spark-4.1/org/apache/spark/sql/comet/shims/ShimStreamSourceAwareSparkPlan.scala similarity index 100% rename from spark/src/main/spark-4.0/org/apache/spark/sql/comet/shims/ShimStreamSourceAwareSparkPlan.scala rename to spark/src/main/spark-4.1/org/apache/spark/sql/comet/shims/ShimStreamSourceAwareSparkPlan.scala From fdca3161a71727b6bd62bd60f2ce20fc950f1be0 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Mon, 22 Dec 2025 14:54:50 -0700 Subject: [PATCH 3/6] chore: update spark short version 4.0 to 4.1 in spark_sql_test workflow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- .github/workflows/spark_sql_test.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/spark_sql_test.yml b/.github/workflows/spark_sql_test.yml index 09b85515ea..cdfb233e97 100644 --- a/.github/workflows/spark_sql_test.yml +++ b/.github/workflows/spark_sql_test.yml @@ -50,7 +50,7 @@ jobs: strategy: matrix: os: [ubuntu-24.04] - spark-version: [{short: '3.4', full: '3.4.3', java: 11}, {short: '3.5', full: '3.5.7', java: 11}, {short: '4.0', full: '4.1.0', java: 17}] + spark-version: [{short: '3.4', full: '3.4.3', java: 11}, {short: '3.5', full: '3.5.7', java: 11}, {short: '4.1', full: '4.1.0', java: 17}] module: - {name: "catalyst", args1: "catalyst/test", args2: ""} - {name: "sql_core-1", args1: "", args2: sql/testOnly * -- -l org.apache.spark.tags.ExtendedSQLTest -l org.apache.spark.tags.SlowSQLTest} @@ -59,9 +59,9 @@ jobs: - {name: "sql_hive-1", args1: "", args2: "hive/testOnly * -- -l org.apache.spark.tags.ExtendedHiveTest -l org.apache.spark.tags.SlowHiveTest"} - {name: "sql_hive-2", args1: "", args2: "hive/testOnly * -- -n org.apache.spark.tags.ExtendedHiveTest"} - {name: "sql_hive-3", args1: "", args2: "hive/testOnly * -- -n org.apache.spark.tags.SlowHiveTest"} - # Skip sql_hive-1 for Spark 4.0 due to https://github.com/apache/datafusion-comet/issues/2946 + # Skip sql_hive-1 for Spark 4.1 due to https://github.com/apache/datafusion-comet/issues/2946 exclude: - - spark-version: {short: '4.0', full: '4.1.0', java: 17} + - spark-version: {short: '4.1', full: '4.1.0', java: 17} module: {name: "sql_hive-1", args1: "", args2: "hive/testOnly * -- -l org.apache.spark.tags.ExtendedHiveTest -l org.apache.spark.tags.SlowHiveTest"} fail-fast: false name: spark-sql-${{ matrix.module.name }}/${{ matrix.os }}/spark-${{ matrix.spark-version.full }}/java-${{ matrix.spark-version.java }} From c4e7bbe3dab6767c18943cad3ec6f9b5a1db18e3 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Mon, 22 Dec 2025 16:01:45 -0700 Subject: [PATCH 4/6] fix --- spark/src/test/scala/org/apache/comet/CometCastSuite.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala index a7bd6febf8..c20b4ef824 100644 --- a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala @@ -31,9 +31,9 @@ import org.apache.spark.sql.catalyst.expressions.Cast import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper import org.apache.spark.sql.functions.col import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.types.{ArrayType, BooleanType, ByteType, DataType, DataTypes, DecimalType, IntegerType, LongType, ShortType, StringType, StructField, StructType} +import org.apache.spark.sql.types.{ArrayType, BooleanType, ByteType, DataType, DataTypes, DecimalType, IntegerType, LongType, ShortType, StringType, StructField, StructType}import org.apache.comet.expressions.{CometCast, CometEvalMode} -import org.apache.comet.expressions.{CometCast, CometEvalMode} +import org.apache.comet.CometSparkSessionExtensions.isSpark40Plus import org.apache.comet.rules.CometScanTypeChecker import org.apache.comet.serde.Compatible From d1be4feee4bdeac3549016555f5006cc486538a0 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Mon, 22 Dec 2025 16:05:09 -0700 Subject: [PATCH 5/6] rename diff --- dev/diffs/{4.0.1.diff => 4.1.0.diff} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename dev/diffs/{4.0.1.diff => 4.1.0.diff} (100%) diff --git a/dev/diffs/4.0.1.diff b/dev/diffs/4.1.0.diff similarity index 100% rename from dev/diffs/4.0.1.diff rename to dev/diffs/4.1.0.diff From e1d5297038a79997ce05ace0a0f3b82561c3fcd3 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Mon, 22 Dec 2025 16:20:31 -0700 Subject: [PATCH 6/6] fix diff --- dev/diffs/4.1.0.diff | 379 +++++++++++++++++++++---------------------- 1 file changed, 184 insertions(+), 195 deletions(-) diff --git a/dev/diffs/4.1.0.diff b/dev/diffs/4.1.0.diff index a9315db005..241c864683 100644 --- a/dev/diffs/4.1.0.diff +++ b/dev/diffs/4.1.0.diff @@ -1,8 +1,8 @@ diff --git a/pom.xml b/pom.xml -index 22922143fc3..477d4ec4194 100644 +index 1824a28614b..0d0ce6d27ce 100644 --- a/pom.xml +++ b/pom.xml -@@ -148,6 +148,8 @@ +@@ -152,6 +152,8 @@ 4.0.3 2.5.3 2.0.8 @@ -11,7 +11,7 @@ index 22922143fc3..477d4ec4194 100644