From 89735b9b75660cafb3980f002f9a3bbc4cbdcc97 Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Tue, 28 Apr 2026 18:29:49 -0600
Subject: [PATCH 1/6] build: change default Maven profile to Spark 4.0 / Scala
 2.13

Update the default build configuration from Spark 3.5 / Scala 2.12 to
Spark 4.0 / Scala 2.13. The spark-3.4 and spark-3.5 profiles now
explicitly set scala.binary.version, shims.majorVerSrc, and
semanticdb.version since those defaults have changed. The scala-2.12
profile is populated and scala-2.13 is now empty (matching defaults).

Also updates Dockerfile, Docker publish workflow, and all documentation
to reflect the new defaults.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .github/workflows/docker-publish.yml          |  2 +-
 .../contributor-guide/benchmarking_aws_ec2.md |  2 +-
 .../contributor-guide/benchmarking_macos.md   | 12 +++---
 .../benchmarking_spark_sql_perf.md            |  6 +--
 docs/source/contributor-guide/debugging.md    |  2 +-
 .../contributor-guide/iceberg-spark-tests.md  |  2 +-
 docs/source/user-guide/latest/datasources.md  |  8 ++--
 docs/source/user-guide/latest/iceberg.md      |  4 +-
 docs/source/user-guide/latest/installation.md |  4 +-
 docs/source/user-guide/latest/kubernetes.md   | 14 +++----
 docs/source/user-guide/latest/source.md       | 10 ++---
 kube/Dockerfile                               | 16 ++++----
 pom.xml                                       | 37 ++++++++++---------
 13 files changed, 61 insertions(+), 58 deletions(-)

diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml
index 63b9d0cd37..790cafa5f5 100644
--- a/.github/workflows/docker-publish.yml
+++ b/.github/workflows/docker-publish.yml
@@ -74,6 +74,6 @@ jobs:
         with:
           platforms: linux/amd64,linux/arm64
           push: true
-          tags: ghcr.io/apache/datafusion-comet:spark-3.5-scala-2.12-${{ env.COMET_VERSION }}
+          tags: ghcr.io/apache/datafusion-comet:spark-4.0-scala-2.13-${{ env.COMET_VERSION }}
           file: kube/Dockerfile
           no-cache: true
diff --git a/docs/source/contributor-guide/benchmarking_aws_ec2.md b/docs/source/contributor-guide/benchmarking_aws_ec2.md
index 81f15d64ea..bc02d2bf79 100644
--- a/docs/source/contributor-guide/benchmarking_aws_ec2.md
+++ b/docs/source/contributor-guide/benchmarking_aws_ec2.md
@@ -104,7 +104,7 @@ make release
 Set `COMET_JAR` environment variable.
 
 ```shell
-export COMET_JAR=/home/ec2-user/datafusion-comet/spark/target/comet-spark-spark3.5_2.12-$COMET_VERSION.jar
+export COMET_JAR=/home/ec2-user/datafusion-comet/spark/target/comet-spark-spark4.0_2.13-$COMET_VERSION.jar
 ```
 
 ## Run Benchmarks
diff --git a/docs/source/contributor-guide/benchmarking_macos.md b/docs/source/contributor-guide/benchmarking_macos.md
index e75261e8d5..20ec0f6f11 100644
--- a/docs/source/contributor-guide/benchmarking_macos.md
+++ b/docs/source/contributor-guide/benchmarking_macos.md
@@ -55,13 +55,13 @@ export DF_BENCH=`pwd`
 
 ## Install Spark
 
-Install Apache Spark. This example refers to 3.5.4 version.
+Install Apache Spark. This example refers to 4.0.2 version.
 
 ```shell
-wget https://archive.apache.org/dist/spark/spark-3.5.4/spark-3.5.4-bin-hadoop3.tgz
-tar xzf spark-3.5.4-bin-hadoop3.tgz
-sudo mv spark-3.5.4-bin-hadoop3 /opt
-export SPARK_HOME=/opt/spark-3.5.4-bin-hadoop3/
+wget https://archive.apache.org/dist/spark/spark-4.0.2/spark-4.0.2-bin-hadoop3.tgz
+tar xzf spark-4.0.2-bin-hadoop3.tgz
+sudo mv spark-4.0.2-bin-hadoop3 /opt
+export SPARK_HOME=/opt/spark-4.0.2-bin-hadoop3/
 ```
 
 Start Spark in standalone mode:
@@ -129,7 +129,7 @@ make release COMET_FEATURES=mimalloc
 Set `COMET_JAR` to point to the location of the Comet jar file. Example for Comet 0.8
 
 ```shell
-export COMET_JAR=`pwd`/spark/target/comet-spark-spark3.5_2.12-0.8.0-SNAPSHOT.jar
+export COMET_JAR=`pwd`/spark/target/comet-spark-spark4.0_2.13-0.8.0-SNAPSHOT.jar
 ```
 
 Run the following command (the `--data` parameter will need to be updated to point to your S3 bucket):
diff --git a/docs/source/contributor-guide/benchmarking_spark_sql_perf.md b/docs/source/contributor-guide/benchmarking_spark_sql_perf.md
index 538539759c..b9af6469b7 100644
--- a/docs/source/contributor-guide/benchmarking_spark_sql_perf.md
+++ b/docs/source/contributor-guide/benchmarking_spark_sql_perf.md
@@ -34,8 +34,8 @@ partitioning and writing to Parquet format automatically.
 
 ## Prerequisites
 
-- Java 17 (for Spark 3.5+)
-- Apache Spark 3.5.x
+- Java 17
+- Apache Spark 4.0.x
 - SBT (Scala Build Tool)
 - C compiler toolchain (`gcc`, `make`, `flex`, `bison`, `byacc`)
 
@@ -225,7 +225,7 @@ Build Comet from source and launch `spark-shell` with both the Comet and spark-s
 
 ```shell
 make release
-export COMET_JAR=$(pwd)/spark/target/comet-spark-spark3.5_2.12-*.jar
+export COMET_JAR=$(pwd)/spark/target/comet-spark-spark4.0_2.13-*.jar
 
 $SPARK_HOME/bin/spark-shell \
     --master $SPARK_MASTER \
diff --git a/docs/source/contributor-guide/debugging.md b/docs/source/contributor-guide/debugging.md
index 3356a83893..2c9909c910 100644
--- a/docs/source/contributor-guide/debugging.md
+++ b/docs/source/contributor-guide/debugging.md
@@ -136,7 +136,7 @@ make release COMET_FEATURES=backtrace
 Set `RUST_BACKTRACE=1` for the Spark worker/executor process, or for `spark-submit` if running in local mode.
 
 ```console
-RUST_BACKTRACE=1 $SPARK_HOME/spark-shell --jars spark/target/comet-spark-spark3.5_2.12-$COMET_VERSION.jar --conf spark.plugins=org.apache.spark.CometPlugin --conf spark.comet.enabled=true --conf spark.comet.exec.enabled=true
+RUST_BACKTRACE=1 $SPARK_HOME/spark-shell --jars spark/target/comet-spark-spark4.0_2.13-$COMET_VERSION.jar --conf spark.plugins=org.apache.spark.CometPlugin --conf spark.comet.enabled=true --conf spark.comet.exec.enabled=true
 ```
 
 Get the expanded exception details
diff --git a/docs/source/contributor-guide/iceberg-spark-tests.md b/docs/source/contributor-guide/iceberg-spark-tests.md
index 38becc0208..cd73ff089f 100644
--- a/docs/source/contributor-guide/iceberg-spark-tests.md
+++ b/docs/source/contributor-guide/iceberg-spark-tests.md
@@ -40,7 +40,7 @@ Here is an overview of the changes that the diffs make to Iceberg:
 Run `make release` in Comet to install the Comet JAR into the local Maven repository, specifying the Spark version.
 
 ```shell
-PROFILES="-Pspark-3.5" make release
+PROFILES="-Pspark-4.0" make release
 ```
 
 ## 2. Clone Iceberg and Apply Diff
diff --git a/docs/source/user-guide/latest/datasources.md b/docs/source/user-guide/latest/datasources.md
index b79831d804..ef1e343cb5 100644
--- a/docs/source/user-guide/latest/datasources.md
+++ b/docs/source/user-guide/latest/datasources.md
@@ -69,12 +69,12 @@ Unlike to native Comet reader the Datafusion reader fully supports nested types
 To build Comet with native DataFusion reader and remote HDFS support it is required to have a JDK installed
 
 Example:
-Build a Comet for `spark-3.5` provide a JDK path in `JAVA_HOME`
+Build a Comet for `spark-4.0` provide a JDK path in `JAVA_HOME`
 Provide the JRE linker path in `RUSTFLAGS`, the path can vary depending on the system. Typically JRE linker is a part of installed JDK
 
 ```shell
-export JAVA_HOME="/opt/homebrew/opt/openjdk@11"
-make release PROFILES="-Pspark-3.5" COMET_FEATURES=hdfs RUSTFLAGS="-L $JAVA_HOME/libexec/openjdk.jdk/Contents/Home/lib/server"
+export JAVA_HOME="/opt/homebrew/opt/openjdk@17"
+make release PROFILES="-Pspark-4.0" COMET_FEATURES=hdfs RUSTFLAGS="-L $JAVA_HOME/libexec/openjdk.jdk/Contents/Home/lib/server"
 ```
 
 Start Comet with experimental reader and HDFS support as [described](installation.md/#run-spark-shell-with-comet-enabled)
@@ -149,7 +149,7 @@ docker compose -f kube/local/hdfs-docker-compose.yml up
 - Build a project with HDFS support
 
 ```shell
-JAVA_HOME="/opt/homebrew/opt/openjdk@11" make release PROFILES="-Pspark-3.5" COMET_FEATURES=hdfs RUSTFLAGS="-L /opt/homebrew/opt/openjdk@11/libexec/openjdk.jdk/Contents/Home/lib/server"
+JAVA_HOME="/opt/homebrew/opt/openjdk@17" make release PROFILES="-Pspark-4.0" COMET_FEATURES=hdfs RUSTFLAGS="-L /opt/homebrew/opt/openjdk@17/libexec/openjdk.jdk/Contents/Home/lib/server"
 ```
 
 - Run local test
diff --git a/docs/source/user-guide/latest/iceberg.md b/docs/source/user-guide/latest/iceberg.md
index 24a4bda057..12418b9545 100644
--- a/docs/source/user-guide/latest/iceberg.md
+++ b/docs/source/user-guide/latest/iceberg.md
@@ -31,7 +31,7 @@ reader is enabled by default. To disable it, set `spark.comet.scan.icebergNative
 
 ```shell
 $SPARK_HOME/bin/spark-shell \
-    --packages org.apache.datafusion:comet-spark-spark3.5_2.12:0.14.0,org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.8.1,org.apache.iceberg:iceberg-core:1.8.1 \
+    --packages org.apache.datafusion:comet-spark-spark4.0_2.13:0.14.0,org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.8.1,org.apache.iceberg:iceberg-core:1.8.1 \
     --repositories https://repo1.maven.org/maven2/ \
     --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions \
     --conf spark.sql.catalog.spark_catalog=org.apache.iceberg.spark.SparkCatalog \
@@ -106,7 +106,7 @@ configure Spark to use a REST catalog with Comet's native Iceberg scan:
 
 ```shell
 $SPARK_HOME/bin/spark-shell \
-    --packages org.apache.datafusion:comet-spark-spark3.5_2.12:0.14.0,org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.8.1,org.apache.iceberg:iceberg-core:1.8.1 \
+    --packages org.apache.datafusion:comet-spark-spark4.0_2.13:0.14.0,org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.8.1,org.apache.iceberg:iceberg-core:1.8.1 \
     --repositories https://repo1.maven.org/maven2/ \
     --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions \
     --conf spark.sql.catalog.rest_cat=org.apache.iceberg.spark.SparkCatalog \
diff --git a/docs/source/user-guide/latest/installation.md b/docs/source/user-guide/latest/installation.md
index be7c853f7a..75f94ab016 100644
--- a/docs/source/user-guide/latest/installation.md
+++ b/docs/source/user-guide/latest/installation.md
@@ -100,7 +100,7 @@ See the [Comet Kubernetes Guide](kubernetes.md) guide.
 Make sure `SPARK_HOME` points to the same Spark version as Comet was built for.
 
 ```shell
-export COMET_JAR=spark/target/comet-spark-spark3.5_2.12-$COMET_VERSION.jar
+export COMET_JAR=spark/target/comet-spark-spark4.0_2.13-$COMET_VERSION.jar
 
 $SPARK_HOME/bin/spark-shell \
     --jars $COMET_JAR \
@@ -156,7 +156,7 @@ explicitly contain Comet otherwise Spark may use a different class-loader for th
 components which will then fail at runtime. For example:
 
 ```
---driver-class-path spark/target/comet-spark-spark3.5_2.12-$COMET_VERSION.jar
+--driver-class-path spark/target/comet-spark-spark4.0_2.13-$COMET_VERSION.jar
 ```
 
 Some cluster managers may require additional configuration, see <https://spark.apache.org/docs/latest/cluster-overview.html>
diff --git a/docs/source/user-guide/latest/kubernetes.md b/docs/source/user-guide/latest/kubernetes.md
index 2fb037d630..718da49d32 100644
--- a/docs/source/user-guide/latest/kubernetes.md
+++ b/docs/source/user-guide/latest/kubernetes.md
@@ -69,30 +69,30 @@ metadata:
 spec:
   type: Scala
   mode: cluster
-  image: apache/datafusion-comet:0.7.0-spark3.5.5-scala2.12-java11
+  image: apache/datafusion-comet:0.7.0-spark4.0.2-scala2.13-java17
   imagePullPolicy: IfNotPresent
   mainClass: org.apache.spark.examples.SparkPi
-  mainApplicationFile: local:///opt/spark/examples/jars/spark-examples_2.12-3.5.5.jar
+  mainApplicationFile: local:///opt/spark/examples/jars/spark-examples_2.13-4.0.2.jar
   sparkConf:
-    "spark.executor.extraClassPath": "/opt/spark/jars/comet-spark-spark3.5_2.12-0.7.0.jar"
-    "spark.driver.extraClassPath": "/opt/spark/jars/comet-spark-spark3.5_2.12-0.7.0.jar"
+    "spark.executor.extraClassPath": "/opt/spark/jars/comet-spark-spark4.0_2.13-0.7.0.jar"
+    "spark.driver.extraClassPath": "/opt/spark/jars/comet-spark-spark4.0_2.13-0.7.0.jar"
     "spark.plugins": "org.apache.spark.CometPlugin"
     "spark.comet.enabled": "true"
     "spark.comet.exec.enabled": "true"
     "spark.comet.exec.shuffle.enabled": "true"
     "spark.comet.exec.shuffle.mode": "auto"
     "spark.shuffle.manager": "org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager"
-  sparkVersion: 3.5.6
+  sparkVersion: 4.0.2
   driver:
     labels:
-      version: 3.5.6
+      version: 4.0.2
     cores: 1
     coreLimit: 1200m
     memory: 512m
     serviceAccount: spark-operator-spark
   executor:
     labels:
-      version: 3.5.6
+      version: 4.0.2
     instances: 1
     cores: 1
     coreLimit: 1200m
diff --git a/docs/source/user-guide/latest/source.md b/docs/source/user-guide/latest/source.md
index eb56e1f21b..5ad33ecfe1 100644
--- a/docs/source/user-guide/latest/source.md
+++ b/docs/source/user-guide/latest/source.md
@@ -38,7 +38,7 @@ cd apache-datafusion-comet-$COMET_VERSION
 Build
 
 ```console
-make release-nogit PROFILES="-Pspark-3.5"
+make release-nogit PROFILES="-Pspark-4.0"
 ```
 
 ## Building from the GitHub repository
@@ -53,17 +53,17 @@ Build Comet for a specific Spark version:
 
 ```console
 cd datafusion-comet
-make release PROFILES="-Pspark-3.5"
+make release PROFILES="-Pspark-4.0"
 ```
 
-Note that the project builds for Scala 2.12 by default but can be built for Scala 2.13 using an additional profile:
+Note that the project builds for Scala 2.13 by default but can be built for Scala 2.12 using an additional profile:
 
 ```console
-make release PROFILES="-Pspark-3.5 -Pscala-2.13"
+make release PROFILES="-Pspark-3.5 -Pscala-2.12"
 ```
 
 To build Comet from the source distribution on an isolated environment without an access to `github.com` it is necessary to disable `git-commit-id-maven-plugin`, otherwise you will face errors that there is no access to the git during the build process. In that case you may use:
 
 ```console
-make release-nogit PROFILES="-Pspark-3.5"
+make release-nogit PROFILES="-Pspark-4.0"
 ```
diff --git a/kube/Dockerfile b/kube/Dockerfile
index 699aeeb210..a078277407 100644
--- a/kube/Dockerfile
+++ b/kube/Dockerfile
@@ -15,14 +15,14 @@
 # limitations under the License.
 #
 
-FROM apache/spark:3.5.8 AS builder
+FROM apache/spark:4.0.2 AS builder
 
 USER root
 
-# Installing JDK11 as the image comes with JRE
+# Installing JDK17 as the image comes with JRE
 RUN apt update \
     && apt install -y curl \
-    && apt install -y openjdk-11-jdk \
+    && apt install -y openjdk-17-jdk \
     && apt clean
 
 RUN apt install -y gcc-10 g++-10 cpp-10 unzip
@@ -37,8 +37,8 @@ ENV PATH="$PATH:/root/.local/bin"
 RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
 ENV PATH="/root/.cargo/bin:${PATH}"
 ENV RUSTFLAGS="-C debuginfo=line-tables-only -C incremental=false"
-ENV SPARK_VERSION=3.5
-ENV SCALA_VERSION=2.12
+ENV SPARK_VERSION=4.0
+ENV SCALA_VERSION=2.13
 
 # copy source files to Docker image
 RUN mkdir /comet
@@ -70,9 +70,9 @@ RUN mkdir -p /root/.m2 && \
 RUN cd /comet \
     && JAVA_HOME=$(readlink -f $(which javac) | sed "s/\/bin\/javac//") make release-nogit PROFILES="-Pspark-$SPARK_VERSION -Pscala-$SCALA_VERSION"
 
-FROM apache/spark:3.5.8
-ENV SPARK_VERSION=3.5
-ENV SCALA_VERSION=2.12
+FROM apache/spark:4.0.2
+ENV SPARK_VERSION=4.0
+ENV SCALA_VERSION=2.13
 USER root
 
 # note the use of a wildcard in the file name so that this works with both snapshot and final release versions
diff --git a/pom.xml b/pom.xml
index b83a6fd45b..c08cf0c1a3 100644
--- a/pom.xml
+++ b/pom.xml
@@ -65,24 +65,24 @@ under the License.
     <extra-enforcer-rules.version>1.7.0</extra-enforcer-rules.version>
     <scalafmt.version>3.6.1</scalafmt.version>
     <apache-rat-plugin.version>0.16.1</apache-rat-plugin.version>
-    <scala.version>2.12.18</scala.version>
-    <scala.binary.version>2.12</scala.binary.version>
+    <scala.version>2.13.16</scala.version>
+    <scala.binary.version>2.13</scala.binary.version>
     <scala.plugin.version>4.9.6</scala.plugin.version>
     <scalatest.version>3.2.16</scalatest.version>
     <scalatest-maven-plugin.version>2.2.0</scalatest-maven-plugin.version>
-    <spark.version>3.5.8</spark.version>
-    <spark.version.short>3.5</spark.version.short>
+    <spark.version>4.0.2</spark.version>
+    <spark.version.short>4.0</spark.version.short>
     <spark.maven.scope>provided</spark.maven.scope>
     <protobuf.version>3.25.5</protobuf.version>
-    <parquet.version>1.13.1</parquet.version>
+    <parquet.version>1.15.2</parquet.version>
     <parquet.maven.scope>provided</parquet.maven.scope>
     <hadoop.version>3.3.4</hadoop.version>
     <arrow.version>18.3.0</arrow.version>
     <codehaus.jackson.version>1.9.13</codehaus.jackson.version>
     <spotless.version>2.43.0</spotless.version>
     <jacoco.version>0.8.11</jacoco.version>
-    <semanticdb.version>4.8.8</semanticdb.version>
-    <slf4j.version>2.0.7</slf4j.version>
+    <semanticdb.version>4.13.6</semanticdb.version>
+    <slf4j.version>2.0.16</slf4j.version>
     <guava.version>33.2.1-jre</guava.version>
     <testcontainers.version>1.21.0</testcontainers.version>
     <amazon-awssdk-v2.version>2.31.51</amazon-awssdk-v2.version>
@@ -116,8 +116,8 @@ under the License.
       -Djdk.reflect.useDirectMethodHandle=false
     </extraJavaTestArgs>
     <argLine>-ea -Xmx4g -Xss4m ${extraJavaTestArgs}</argLine>
-    <shims.majorVerSrc>spark-3.x</shims.majorVerSrc>
-    <shims.minorVerSrc>spark-3.5</shims.minorVerSrc>
+    <shims.majorVerSrc>spark-4.x</shims.majorVerSrc>
+    <shims.minorVerSrc>spark-4.0</shims.minorVerSrc>
   </properties>
 
   <dependencyManagement>
@@ -635,10 +635,13 @@ under the License.
       <id>spark-3.4</id>
       <properties>
         <scala.version>2.12.17</scala.version>
+        <scala.binary.version>2.12</scala.binary.version>
         <spark.version>3.4.3</spark.version>
         <spark.version.short>3.4</spark.version.short>
         <parquet.version>1.13.1</parquet.version>
+        <semanticdb.version>4.8.8</semanticdb.version>
         <slf4j.version>2.0.6</slf4j.version>
+        <shims.majorVerSrc>spark-3.x</shims.majorVerSrc>
         <shims.minorVerSrc>spark-3.4</shims.minorVerSrc>
         <java.version>11</java.version>
         <maven.compiler.source>${java.version}</maven.compiler.source>
@@ -650,10 +653,13 @@ under the License.
       <id>spark-3.5</id>
       <properties>
         <scala.version>2.12.18</scala.version>
+        <scala.binary.version>2.12</scala.binary.version>
         <spark.version>3.5.8</spark.version>
         <spark.version.short>3.5</spark.version.short>
         <parquet.version>1.13.1</parquet.version>
+        <semanticdb.version>4.8.8</semanticdb.version>
         <slf4j.version>2.0.7</slf4j.version>
+        <shims.majorVerSrc>spark-3.x</shims.majorVerSrc>
         <shims.minorVerSrc>spark-3.5</shims.minorVerSrc>
         <java.version>11</java.version>
         <maven.compiler.source>${java.version}</maven.compiler.source>
@@ -662,10 +668,8 @@ under the License.
     </profile>
 
     <profile>
-      <!-- FIXME: this is WIP. Tests may fail https://github.com/apache/datafusion-comet/issues/551 -->
       <id>spark-4.0</id>
       <properties>
-        <!-- Use Scala 2.13 by default -->
         <scala.version>2.13.16</scala.version>
         <scala.binary.version>2.13</scala.binary.version>
         <spark.version>4.0.2</spark.version>
@@ -675,7 +679,6 @@ under the License.
         <slf4j.version>2.0.16</slf4j.version>
         <shims.majorVerSrc>spark-4.x</shims.majorVerSrc>
         <shims.minorVerSrc>spark-4.0</shims.minorVerSrc>
-        <!-- Use jdk17 by default -->
         <java.version>17</java.version>
         <maven.compiler.source>${java.version}</maven.compiler.source>
         <maven.compiler.target>${java.version}</maven.compiler.target>
@@ -729,15 +732,15 @@ under the License.
 
     <profile>
       <id>scala-2.12</id>
+      <properties>
+        <scala.version>2.12.18</scala.version>
+        <scala.binary.version>2.12</scala.binary.version>
+        <semanticdb.version>4.8.8</semanticdb.version>
+      </properties>
     </profile>
 
     <profile>
       <id>scala-2.13</id>
-      <properties>
-        <scala.version>2.13.16</scala.version>
-        <scala.binary.version>2.13</scala.binary.version>
-        <semanticdb.version>4.13.6</semanticdb.version>
-      </properties>
     </profile>
 
     <profile>

From db67718f04f08c6160a5e2156786462d3b704e4d Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Tue, 28 Apr 2026 19:18:01 -0600
Subject: [PATCH 2/6] fix: restore scala-2.13 profile properties and update
 TPC-DS/TPC-H JDK to 17

The scala-2.13 profile must retain its properties so that
`-Pspark-3.x -Pscala-2.13` correctly overrides the Spark profile's
scala.binary.version=2.12. Without this, Iceberg CI builds produce
_2.12 artifacts when _2.13 is expected.

The TPC-DS/TPC-H verification jobs used JDK 11 with no explicit Spark
profile, so they now inherit the Spark 4.0 default which requires
JDK 17.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .github/workflows/pr_build_linux.yml | 4 ++--
 pom.xml                              | 5 +++++
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/pr_build_linux.yml b/.github/workflows/pr_build_linux.yml
index a8d925b1c2..f57a95c0f7 100644
--- a/.github/workflows/pr_build_linux.yml
+++ b/.github/workflows/pr_build_linux.yml
@@ -444,7 +444,7 @@ jobs:
         uses: ./.github/actions/setup-builder
         with:
           rust-version: ${{ env.RUST_VERSION }}
-          jdk-version: 11
+          jdk-version: 17
 
       - name: Download native library
         uses: actions/download-artifact@v8
@@ -502,7 +502,7 @@ jobs:
         uses: ./.github/actions/setup-builder
         with:
           rust-version: ${{ env.RUST_VERSION }}
-          jdk-version: 11
+          jdk-version: 17
 
       - name: Download native library
         uses: actions/download-artifact@v8
diff --git a/pom.xml b/pom.xml
index c08cf0c1a3..5199c8a453 100644
--- a/pom.xml
+++ b/pom.xml
@@ -741,6 +741,11 @@ under the License.
 
     <profile>
       <id>scala-2.13</id>
+      <properties>
+        <scala.version>2.13.16</scala.version>
+        <scala.binary.version>2.13</scala.binary.version>
+        <semanticdb.version>4.13.6</semanticdb.version>
+      </properties>
     </profile>
 
     <profile>

From b1536947aeaa4e4fba695013e0b4e8fad764eeed Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Tue, 28 Apr 2026 19:39:04 -0600
Subject: [PATCH 3/6] fix: move activeByDefault from spark-3.5 to spark-4.0 in
 spark/pom.xml

The spark/pom.xml Iceberg dependency profiles use activeByDefault to
provide the right Iceberg version when no -Pspark-* is passed. Since
the default is now Spark 4.0, the activeByDefault must be on the
spark-4.0 profile (Iceberg 1.10.0) rather than spark-3.5 (Iceberg
1.8.1), otherwise Maven resolves the non-existent artifact
iceberg-spark-runtime-4.0_2.13:1.8.1.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 spark/pom.xml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/spark/pom.xml b/spark/pom.xml
index 722cebade3..fc0fc5c99a 100644
--- a/spark/pom.xml
+++ b/spark/pom.xml
@@ -223,9 +223,6 @@ under the License.
 
     <profile>
       <id>spark-3.5</id>
-      <activation>
-        <activeByDefault>true</activeByDefault>
-      </activation>
       <dependencies>
         <dependency>
           <groupId>org.apache.iceberg</groupId>
@@ -251,6 +248,9 @@ under the License.
 
     <profile>
       <id>spark-4.0</id>
+      <activation>
+        <activeByDefault>true</activeByDefault>
+      </activation>
       <dependencies>
         <dependency>
           <groupId>org.apache.iceberg</groupId>

From c740430c8edc1d23c2c16752ea22c30315755239 Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Wed, 6 May 2026 08:36:50 -0600
Subject: [PATCH 4/6] build: bump default Maven profile from Spark 4.0 to Spark
 4.1

Updates root pom.xml defaults to Spark 4.1.1, Scala 2.13.17, Parquet
1.16.0, slf4j 2.0.17, and shim sources to spark-4.1. Moves
activeByDefault from spark-4.0 to spark-4.1 in spark/pom.xml. Bumps
kube/Dockerfile to apache/spark:4.1.1, the docker-publish image tag,
and all docs and example commands that the previous default-bump PR
had updated to spark4.0_2.13.
---
 .github/workflows/docker-publish.yml               |  2 +-
 .../contributor-guide/benchmarking_aws_ec2.md      |  2 +-
 .../source/contributor-guide/benchmarking_macos.md | 12 ++++++------
 .../benchmarking_spark_sql_perf.md                 |  4 ++--
 docs/source/contributor-guide/debugging.md         |  2 +-
 .../contributor-guide/iceberg-spark-tests.md       |  2 +-
 docs/source/user-guide/latest/datasources.md       |  6 +++---
 docs/source/user-guide/latest/iceberg.md           |  4 ++--
 docs/source/user-guide/latest/installation.md      |  6 +++---
 docs/source/user-guide/latest/kubernetes.md        | 14 +++++++-------
 docs/source/user-guide/latest/source.md            |  6 +++---
 kube/Dockerfile                                    |  8 ++++----
 pom.xml                                            | 14 ++++++--------
 spark/pom.xml                                      |  6 +++---
 14 files changed, 43 insertions(+), 45 deletions(-)

diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml
index 790cafa5f5..e3bae668c6 100644
--- a/.github/workflows/docker-publish.yml
+++ b/.github/workflows/docker-publish.yml
@@ -74,6 +74,6 @@ jobs:
         with:
           platforms: linux/amd64,linux/arm64
           push: true
-          tags: ghcr.io/apache/datafusion-comet:spark-4.0-scala-2.13-${{ env.COMET_VERSION }}
+          tags: ghcr.io/apache/datafusion-comet:spark-4.1-scala-2.13-${{ env.COMET_VERSION }}
           file: kube/Dockerfile
           no-cache: true
diff --git a/docs/source/contributor-guide/benchmarking_aws_ec2.md b/docs/source/contributor-guide/benchmarking_aws_ec2.md
index bc02d2bf79..bb92a8958f 100644
--- a/docs/source/contributor-guide/benchmarking_aws_ec2.md
+++ b/docs/source/contributor-guide/benchmarking_aws_ec2.md
@@ -104,7 +104,7 @@ make release
 Set `COMET_JAR` environment variable.
 
 ```shell
-export COMET_JAR=/home/ec2-user/datafusion-comet/spark/target/comet-spark-spark4.0_2.13-$COMET_VERSION.jar
+export COMET_JAR=/home/ec2-user/datafusion-comet/spark/target/comet-spark-spark4.1_2.13-$COMET_VERSION.jar
 ```
 
 ## Run Benchmarks
diff --git a/docs/source/contributor-guide/benchmarking_macos.md b/docs/source/contributor-guide/benchmarking_macos.md
index 20ec0f6f11..4c37a32a26 100644
--- a/docs/source/contributor-guide/benchmarking_macos.md
+++ b/docs/source/contributor-guide/benchmarking_macos.md
@@ -55,13 +55,13 @@ export DF_BENCH=`pwd`
 
 ## Install Spark
 
-Install Apache Spark. This example refers to 4.0.2 version.
+Install Apache Spark. This example refers to 4.1.1 version.
 
 ```shell
-wget https://archive.apache.org/dist/spark/spark-4.0.2/spark-4.0.2-bin-hadoop3.tgz
-tar xzf spark-4.0.2-bin-hadoop3.tgz
-sudo mv spark-4.0.2-bin-hadoop3 /opt
-export SPARK_HOME=/opt/spark-4.0.2-bin-hadoop3/
+wget https://archive.apache.org/dist/spark/spark-4.1.1/spark-4.1.1-bin-hadoop3.tgz
+tar xzf spark-4.1.1-bin-hadoop3.tgz
+sudo mv spark-4.1.1-bin-hadoop3 /opt
+export SPARK_HOME=/opt/spark-4.1.1-bin-hadoop3/
 ```
 
 Start Spark in standalone mode:
@@ -129,7 +129,7 @@ make release COMET_FEATURES=mimalloc
 Set `COMET_JAR` to point to the location of the Comet jar file. Example for Comet 0.8
 
 ```shell
-export COMET_JAR=`pwd`/spark/target/comet-spark-spark4.0_2.13-0.8.0-SNAPSHOT.jar
+export COMET_JAR=`pwd`/spark/target/comet-spark-spark4.1_2.13-0.8.0-SNAPSHOT.jar
 ```
 
 Run the following command (the `--data` parameter will need to be updated to point to your S3 bucket):
diff --git a/docs/source/contributor-guide/benchmarking_spark_sql_perf.md b/docs/source/contributor-guide/benchmarking_spark_sql_perf.md
index b9af6469b7..d893319443 100644
--- a/docs/source/contributor-guide/benchmarking_spark_sql_perf.md
+++ b/docs/source/contributor-guide/benchmarking_spark_sql_perf.md
@@ -35,7 +35,7 @@ partitioning and writing to Parquet format automatically.
 ## Prerequisites
 
 - Java 17
-- Apache Spark 4.0.x
+- Apache Spark 4.1.x
 - SBT (Scala Build Tool)
 - C compiler toolchain (`gcc`, `make`, `flex`, `bison`, `byacc`)
 
@@ -225,7 +225,7 @@ Build Comet from source and launch `spark-shell` with both the Comet and spark-s
 
 ```shell
 make release
-export COMET_JAR=$(pwd)/spark/target/comet-spark-spark4.0_2.13-*.jar
+export COMET_JAR=$(pwd)/spark/target/comet-spark-spark4.1_2.13-*.jar
 
 $SPARK_HOME/bin/spark-shell \
     --master $SPARK_MASTER \
diff --git a/docs/source/contributor-guide/debugging.md b/docs/source/contributor-guide/debugging.md
index 2c9909c910..e5372d922d 100644
--- a/docs/source/contributor-guide/debugging.md
+++ b/docs/source/contributor-guide/debugging.md
@@ -136,7 +136,7 @@ make release COMET_FEATURES=backtrace
 Set `RUST_BACKTRACE=1` for the Spark worker/executor process, or for `spark-submit` if running in local mode.
 
 ```console
-RUST_BACKTRACE=1 $SPARK_HOME/spark-shell --jars spark/target/comet-spark-spark4.0_2.13-$COMET_VERSION.jar --conf spark.plugins=org.apache.spark.CometPlugin --conf spark.comet.enabled=true --conf spark.comet.exec.enabled=true
+RUST_BACKTRACE=1 $SPARK_HOME/spark-shell --jars spark/target/comet-spark-spark4.1_2.13-$COMET_VERSION.jar --conf spark.plugins=org.apache.spark.CometPlugin --conf spark.comet.enabled=true --conf spark.comet.exec.enabled=true
 ```
 
 Get the expanded exception details
diff --git a/docs/source/contributor-guide/iceberg-spark-tests.md b/docs/source/contributor-guide/iceberg-spark-tests.md
index cd73ff089f..f37ee4f1f3 100644
--- a/docs/source/contributor-guide/iceberg-spark-tests.md
+++ b/docs/source/contributor-guide/iceberg-spark-tests.md
@@ -40,7 +40,7 @@ Here is an overview of the changes that the diffs make to Iceberg:
 Run `make release` in Comet to install the Comet JAR into the local Maven repository, specifying the Spark version.
 
 ```shell
-PROFILES="-Pspark-4.0" make release
+PROFILES="-Pspark-4.1" make release
 ```
 
 ## 2. Clone Iceberg and Apply Diff
diff --git a/docs/source/user-guide/latest/datasources.md b/docs/source/user-guide/latest/datasources.md
index ef1e343cb5..5fb47fa9a9 100644
--- a/docs/source/user-guide/latest/datasources.md
+++ b/docs/source/user-guide/latest/datasources.md
@@ -69,12 +69,12 @@ Unlike to native Comet reader the Datafusion reader fully supports nested types
 To build Comet with native DataFusion reader and remote HDFS support it is required to have a JDK installed
 
 Example:
-Build a Comet for `spark-4.0` provide a JDK path in `JAVA_HOME`
+Build a Comet for `spark-4.1` provide a JDK path in `JAVA_HOME`
 Provide the JRE linker path in `RUSTFLAGS`, the path can vary depending on the system. Typically JRE linker is a part of installed JDK
 
 ```shell
 export JAVA_HOME="/opt/homebrew/opt/openjdk@17"
-make release PROFILES="-Pspark-4.0" COMET_FEATURES=hdfs RUSTFLAGS="-L $JAVA_HOME/libexec/openjdk.jdk/Contents/Home/lib/server"
+make release PROFILES="-Pspark-4.1" COMET_FEATURES=hdfs RUSTFLAGS="-L $JAVA_HOME/libexec/openjdk.jdk/Contents/Home/lib/server"
 ```
 
 Start Comet with experimental reader and HDFS support as [described](installation.md/#run-spark-shell-with-comet-enabled)
@@ -149,7 +149,7 @@ docker compose -f kube/local/hdfs-docker-compose.yml up
 - Build a project with HDFS support
 
 ```shell
-JAVA_HOME="/opt/homebrew/opt/openjdk@17" make release PROFILES="-Pspark-4.0" COMET_FEATURES=hdfs RUSTFLAGS="-L /opt/homebrew/opt/openjdk@17/libexec/openjdk.jdk/Contents/Home/lib/server"
+JAVA_HOME="/opt/homebrew/opt/openjdk@17" make release PROFILES="-Pspark-4.1" COMET_FEATURES=hdfs RUSTFLAGS="-L /opt/homebrew/opt/openjdk@17/libexec/openjdk.jdk/Contents/Home/lib/server"
 ```
 
 - Run local test
diff --git a/docs/source/user-guide/latest/iceberg.md b/docs/source/user-guide/latest/iceberg.md
index 12418b9545..cb6fdab2c9 100644
--- a/docs/source/user-guide/latest/iceberg.md
+++ b/docs/source/user-guide/latest/iceberg.md
@@ -31,7 +31,7 @@ reader is enabled by default. To disable it, set `spark.comet.scan.icebergNative
 
 ```shell
 $SPARK_HOME/bin/spark-shell \
-    --packages org.apache.datafusion:comet-spark-spark4.0_2.13:0.14.0,org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.8.1,org.apache.iceberg:iceberg-core:1.8.1 \
+    --packages org.apache.datafusion:comet-spark-spark4.1_2.13:0.14.0,org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.8.1,org.apache.iceberg:iceberg-core:1.8.1 \
     --repositories https://repo1.maven.org/maven2/ \
     --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions \
     --conf spark.sql.catalog.spark_catalog=org.apache.iceberg.spark.SparkCatalog \
@@ -106,7 +106,7 @@ configure Spark to use a REST catalog with Comet's native Iceberg scan:
 
 ```shell
 $SPARK_HOME/bin/spark-shell \
-    --packages org.apache.datafusion:comet-spark-spark4.0_2.13:0.14.0,org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.8.1,org.apache.iceberg:iceberg-core:1.8.1 \
+    --packages org.apache.datafusion:comet-spark-spark4.1_2.13:0.14.0,org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.8.1,org.apache.iceberg:iceberg-core:1.8.1 \
     --repositories https://repo1.maven.org/maven2/ \
     --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions \
     --conf spark.sql.catalog.rest_cat=org.apache.iceberg.spark.SparkCatalog \
diff --git a/docs/source/user-guide/latest/installation.md b/docs/source/user-guide/latest/installation.md
index 50bd430f10..4a6521b575 100644
--- a/docs/source/user-guide/latest/installation.md
+++ b/docs/source/user-guide/latest/installation.md
@@ -85,7 +85,7 @@ Here are the direct links for downloading the Comet $COMET_VERSION jar file.
 - [Comet plugin for Spark 3.5 / Scala 2.12](https://repo1.maven.org/maven2/org/apache/datafusion/comet-spark-spark3.5_2.12/$COMET_VERSION/comet-spark-spark3.5_2.12-$COMET_VERSION.jar)
 - [Comet plugin for Spark 3.5 / Scala 2.13](https://repo1.maven.org/maven2/org/apache/datafusion/comet-spark-spark3.5_2.13/$COMET_VERSION/comet-spark-spark3.5_2.13-$COMET_VERSION.jar)
 - [Comet plugin for Spark 4.0 / Scala 2.13](https://repo1.maven.org/maven2/org/apache/datafusion/comet-spark-spark4.0_2.13/$COMET_VERSION/comet-spark-spark4.0_2.13-$COMET_VERSION.jar)
-- [Comet plugin for Spark 4.1 / Scala 2.13 (Experimental)](https://repo1.maven.org/maven2/org/apache/datafusion/comet-spark-spark4.1_2.13/$COMET_VERSION/comet-spark-spark4.1_2.13-$COMET_VERSION.jar)
+- [Comet plugin for Spark 4.1 / Scala 2.13](https://repo1.maven.org/maven2/org/apache/datafusion/comet-spark-spark4.1_2.13/$COMET_VERSION/comet-spark-spark4.1_2.13-$COMET_VERSION.jar)
 - [Comet plugin for Spark 4.2 / Scala 2.13 (Experimental)](https://repo1.maven.org/maven2/org/apache/datafusion/comet-spark-spark4.2_2.13/$COMET_VERSION/comet-spark-spark4.2_2.13-$COMET_VERSION.jar)
 <!-- ENDIF -->
 
@@ -105,7 +105,7 @@ See the [Comet Kubernetes Guide](kubernetes.md) guide.
 Make sure `SPARK_HOME` points to the same Spark version as Comet was built for.
 
 ```shell
-export COMET_JAR=spark/target/comet-spark-spark4.0_2.13-$COMET_VERSION.jar
+export COMET_JAR=spark/target/comet-spark-spark4.1_2.13-$COMET_VERSION.jar
 
 $SPARK_HOME/bin/spark-shell \
     --jars $COMET_JAR \
@@ -161,7 +161,7 @@ explicitly contain Comet otherwise Spark may use a different class-loader for th
 components which will then fail at runtime. For example:
 
 ```
---driver-class-path spark/target/comet-spark-spark4.0_2.13-$COMET_VERSION.jar
+--driver-class-path spark/target/comet-spark-spark4.1_2.13-$COMET_VERSION.jar
 ```
 
 Some cluster managers may require additional configuration, see <https://spark.apache.org/docs/latest/cluster-overview.html>
diff --git a/docs/source/user-guide/latest/kubernetes.md b/docs/source/user-guide/latest/kubernetes.md
index 718da49d32..fd84b7ad9b 100644
--- a/docs/source/user-guide/latest/kubernetes.md
+++ b/docs/source/user-guide/latest/kubernetes.md
@@ -69,30 +69,30 @@ metadata:
 spec:
   type: Scala
   mode: cluster
-  image: apache/datafusion-comet:0.7.0-spark4.0.2-scala2.13-java17
+  image: apache/datafusion-comet:0.7.0-spark4.1.1-scala2.13-java17
   imagePullPolicy: IfNotPresent
   mainClass: org.apache.spark.examples.SparkPi
-  mainApplicationFile: local:///opt/spark/examples/jars/spark-examples_2.13-4.0.2.jar
+  mainApplicationFile: local:///opt/spark/examples/jars/spark-examples_2.13-4.1.1.jar
   sparkConf:
-    "spark.executor.extraClassPath": "/opt/spark/jars/comet-spark-spark4.0_2.13-0.7.0.jar"
-    "spark.driver.extraClassPath": "/opt/spark/jars/comet-spark-spark4.0_2.13-0.7.0.jar"
+    "spark.executor.extraClassPath": "/opt/spark/jars/comet-spark-spark4.1_2.13-0.7.0.jar"
+    "spark.driver.extraClassPath": "/opt/spark/jars/comet-spark-spark4.1_2.13-0.7.0.jar"
     "spark.plugins": "org.apache.spark.CometPlugin"
     "spark.comet.enabled": "true"
     "spark.comet.exec.enabled": "true"
     "spark.comet.exec.shuffle.enabled": "true"
     "spark.comet.exec.shuffle.mode": "auto"
     "spark.shuffle.manager": "org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager"
-  sparkVersion: 4.0.2
+  sparkVersion: 4.1.1
   driver:
     labels:
-      version: 4.0.2
+      version: 4.1.1
     cores: 1
     coreLimit: 1200m
     memory: 512m
     serviceAccount: spark-operator-spark
   executor:
     labels:
-      version: 4.0.2
+      version: 4.1.1
     instances: 1
     cores: 1
     coreLimit: 1200m
diff --git a/docs/source/user-guide/latest/source.md b/docs/source/user-guide/latest/source.md
index 5ad33ecfe1..6ae43be56a 100644
--- a/docs/source/user-guide/latest/source.md
+++ b/docs/source/user-guide/latest/source.md
@@ -38,7 +38,7 @@ cd apache-datafusion-comet-$COMET_VERSION
 Build
 
 ```console
-make release-nogit PROFILES="-Pspark-4.0"
+make release-nogit PROFILES="-Pspark-4.1"
 ```
 
 ## Building from the GitHub repository
@@ -53,7 +53,7 @@ Build Comet for a specific Spark version:
 
 ```console
 cd datafusion-comet
-make release PROFILES="-Pspark-4.0"
+make release PROFILES="-Pspark-4.1"
 ```
 
 Note that the project builds for Scala 2.13 by default but can be built for Scala 2.12 using an additional profile:
@@ -65,5 +65,5 @@ make release PROFILES="-Pspark-3.5 -Pscala-2.12"
 To build Comet from the source distribution on an isolated environment without an access to `github.com` it is necessary to disable `git-commit-id-maven-plugin`, otherwise you will face errors that there is no access to the git during the build process. In that case you may use:
 
 ```console
-make release-nogit PROFILES="-Pspark-4.0"
+make release-nogit PROFILES="-Pspark-4.1"
 ```
diff --git a/kube/Dockerfile b/kube/Dockerfile
index a078277407..d13b401a01 100644
--- a/kube/Dockerfile
+++ b/kube/Dockerfile
@@ -15,7 +15,7 @@
 # limitations under the License.
 #
 
-FROM apache/spark:4.0.2 AS builder
+FROM apache/spark:4.1.1 AS builder
 
 USER root
 
@@ -37,7 +37,7 @@ ENV PATH="$PATH:/root/.local/bin"
 RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
 ENV PATH="/root/.cargo/bin:${PATH}"
 ENV RUSTFLAGS="-C debuginfo=line-tables-only -C incremental=false"
-ENV SPARK_VERSION=4.0
+ENV SPARK_VERSION=4.1
 ENV SCALA_VERSION=2.13
 
 # copy source files to Docker image
@@ -70,8 +70,8 @@ RUN mkdir -p /root/.m2 && \
 RUN cd /comet \
     && JAVA_HOME=$(readlink -f $(which javac) | sed "s/\/bin\/javac//") make release-nogit PROFILES="-Pspark-$SPARK_VERSION -Pscala-$SCALA_VERSION"
 
-FROM apache/spark:4.0.2
-ENV SPARK_VERSION=4.0
+FROM apache/spark:4.1.1
+ENV SPARK_VERSION=4.1
 ENV SCALA_VERSION=2.13
 USER root
 
diff --git a/pom.xml b/pom.xml
index 5199c8a453..f378bee7a8 100644
--- a/pom.xml
+++ b/pom.xml
@@ -65,16 +65,16 @@ under the License.
     <extra-enforcer-rules.version>1.7.0</extra-enforcer-rules.version>
     <scalafmt.version>3.6.1</scalafmt.version>
     <apache-rat-plugin.version>0.16.1</apache-rat-plugin.version>
-    <scala.version>2.13.16</scala.version>
+    <scala.version>2.13.17</scala.version>
     <scala.binary.version>2.13</scala.binary.version>
     <scala.plugin.version>4.9.6</scala.plugin.version>
     <scalatest.version>3.2.16</scalatest.version>
     <scalatest-maven-plugin.version>2.2.0</scalatest-maven-plugin.version>
-    <spark.version>4.0.2</spark.version>
-    <spark.version.short>4.0</spark.version.short>
+    <spark.version>4.1.1</spark.version>
+    <spark.version.short>4.1</spark.version.short>
     <spark.maven.scope>provided</spark.maven.scope>
     <protobuf.version>3.25.5</protobuf.version>
-    <parquet.version>1.15.2</parquet.version>
+    <parquet.version>1.16.0</parquet.version>
     <parquet.maven.scope>provided</parquet.maven.scope>
     <hadoop.version>3.3.4</hadoop.version>
     <arrow.version>18.3.0</arrow.version>
@@ -82,7 +82,7 @@ under the License.
     <spotless.version>2.43.0</spotless.version>
     <jacoco.version>0.8.11</jacoco.version>
     <semanticdb.version>4.13.6</semanticdb.version>
-    <slf4j.version>2.0.16</slf4j.version>
+    <slf4j.version>2.0.17</slf4j.version>
     <guava.version>33.2.1-jre</guava.version>
     <testcontainers.version>1.21.0</testcontainers.version>
     <amazon-awssdk-v2.version>2.31.51</amazon-awssdk-v2.version>
@@ -117,7 +117,7 @@ under the License.
     </extraJavaTestArgs>
     <argLine>-ea -Xmx4g -Xss4m ${extraJavaTestArgs}</argLine>
     <shims.majorVerSrc>spark-4.x</shims.majorVerSrc>
-    <shims.minorVerSrc>spark-4.0</shims.minorVerSrc>
+    <shims.minorVerSrc>spark-4.1</shims.minorVerSrc>
   </properties>
 
   <dependencyManagement>
@@ -686,7 +686,6 @@ under the License.
     </profile>
 
     <profile>
-      <!-- WIP: Spark 4.1 support, with its own shim sources for 4.1-specific APIs -->
       <id>spark-4.1</id>
       <properties>
         <!-- Spark 4.1.1 is compiled against Scala 2.13.17 and emits calls into stdlib methods
@@ -702,7 +701,6 @@ under the License.
         <slf4j.version>2.0.17</slf4j.version>
         <shims.majorVerSrc>spark-4.x</shims.majorVerSrc>
         <shims.minorVerSrc>spark-4.1</shims.minorVerSrc>
-        <!-- Use jdk17 by default -->
         <java.version>17</java.version>
         <maven.compiler.source>${java.version}</maven.compiler.source>
         <maven.compiler.target>${java.version}</maven.compiler.target>
diff --git a/spark/pom.xml b/spark/pom.xml
index fc0fc5c99a..f36a132efc 100644
--- a/spark/pom.xml
+++ b/spark/pom.xml
@@ -248,9 +248,6 @@ under the License.
 
     <profile>
       <id>spark-4.0</id>
-      <activation>
-        <activeByDefault>true</activeByDefault>
-      </activation>
       <dependencies>
         <dependency>
           <groupId>org.apache.iceberg</groupId>
@@ -275,6 +272,9 @@ under the License.
     </profile>
     <profile>
       <id>spark-4.1</id>
+      <activation>
+        <activeByDefault>true</activeByDefault>
+      </activation>
       <dependencies>
         <!-- iceberg-spark-runtime-4.1 is not yet published; reuse the 4.0 runtime -->
         <dependency>

From 7f0b1eb3bf831961d7a3f437c0da300068353e99 Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Wed, 6 May 2026 16:46:39 -0600
Subject: [PATCH 5/6] ci: add JAVA_TOOL_OPTIONS to TPC-H/TPC-DS verify jobs for
 JDK 17

The TPC-H data generation step launches Spark via `mvnw exec:java`,
which falls outside surefire's argLine, so the --add-opens flags from
pom.xml do not apply. With this PR bumping these jobs from JDK 11 to
JDK 17, GenTPCHData hangs in shuffle when Kryo reflectively probes
java.nio.ByteBuffer.hb. Set JAVA_TOOL_OPTIONS at the job level so
both exec:java and surefire forks get the required flags.
---
 .github/workflows/pr_build_linux.yml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.github/workflows/pr_build_linux.yml b/.github/workflows/pr_build_linux.yml
index b8b025f0db..437ba49dda 100644
--- a/.github/workflows/pr_build_linux.yml
+++ b/.github/workflows/pr_build_linux.yml
@@ -451,6 +451,8 @@ jobs:
     runs-on: ${{ github.repository_owner == 'apache' && format('runs-on={0},family=m8a+m7a+c8a,cpu=16,image=ubuntu24-full-x64,extras=s3-cache,disk=large,tag=datafusion-comet', github.run_id) || 'ubuntu-latest' }}
     container:
       image: amd64/rust
+    env:
+      JAVA_TOOL_OPTIONS: --add-exports=java.base/sun.nio.ch=ALL-UNNAMED --add-exports=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.lang=ALL-UNNAMED
     steps:
       - uses: runs-on/action@742bf56072eb4845a0f94b3394673e4903c90ff0  # v2.1.0
 
@@ -505,6 +507,8 @@ jobs:
     runs-on: ${{ github.repository_owner == 'apache' && format('runs-on={0},family=m8a+m7a+c8a,cpu=16,image=ubuntu24-full-x64,extras=s3-cache,disk=large,tag=datafusion-comet', github.run_id) || 'ubuntu-latest' }}
     container:
       image: amd64/rust
+    env:
+      JAVA_TOOL_OPTIONS: --add-exports=java.base/sun.nio.ch=ALL-UNNAMED --add-exports=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.lang=ALL-UNNAMED
     strategy:
       matrix:
         join: [sort_merge, broadcast, hash]

From 185b69c342c6d4dd81cef914b3e1ddf541055eee Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Wed, 6 May 2026 17:53:23 -0600
Subject: [PATCH 6/6] ci: expand JAVA_TOOL_OPTIONS to cover java.lang.invoke
 for TPC-H/TPC-DS jobs

The previous flag set only opened java.lang and java.nio, but with the
default profile now Spark 4.1 / Scala 2.13 the TPC-H GenTPCHData run
trips on SerializedLambda.capturingClass and dies with
InaccessibleObjectException for java.lang.invoke. Mirror the full list
from pom.xml's extraJavaTestArgs so any other reflection-driven access
into java.base also has the matching --add-opens grant.
---
 .github/workflows/pr_build_linux.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/pr_build_linux.yml b/.github/workflows/pr_build_linux.yml
index 437ba49dda..2826aeeecc 100644
--- a/.github/workflows/pr_build_linux.yml
+++ b/.github/workflows/pr_build_linux.yml
@@ -452,7 +452,7 @@ jobs:
     container:
       image: amd64/rust
     env:
-      JAVA_TOOL_OPTIONS: --add-exports=java.base/sun.nio.ch=ALL-UNNAMED --add-exports=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.lang=ALL-UNNAMED
+      JAVA_TOOL_OPTIONS: --add-exports=java.base/sun.nio.ch=ALL-UNNAMED --add-exports=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/jdk.internal.ref=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED
     steps:
       - uses: runs-on/action@742bf56072eb4845a0f94b3394673e4903c90ff0  # v2.1.0
 
@@ -508,7 +508,7 @@ jobs:
     container:
       image: amd64/rust
     env:
-      JAVA_TOOL_OPTIONS: --add-exports=java.base/sun.nio.ch=ALL-UNNAMED --add-exports=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.lang=ALL-UNNAMED
+      JAVA_TOOL_OPTIONS: --add-exports=java.base/sun.nio.ch=ALL-UNNAMED --add-exports=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/jdk.internal.ref=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED
     strategy:
       matrix:
         join: [sort_merge, broadcast, hash]