From 89735b9b75660cafb3980f002f9a3bbc4cbdcc97 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Tue, 28 Apr 2026 18:29:49 -0600 Subject: [PATCH 1/6] build: change default Maven profile to Spark 4.0 / Scala 2.13 Update the default build configuration from Spark 3.5 / Scala 2.12 to Spark 4.0 / Scala 2.13. The spark-3.4 and spark-3.5 profiles now explicitly set scala.binary.version, shims.majorVerSrc, and semanticdb.version since those defaults have changed. The scala-2.12 profile is populated and scala-2.13 is now empty (matching defaults). Also updates Dockerfile, Docker publish workflow, and all documentation to reflect the new defaults. Co-Authored-By: Claude Opus 4.6 --- .github/workflows/docker-publish.yml | 2 +- .../contributor-guide/benchmarking_aws_ec2.md | 2 +- .../contributor-guide/benchmarking_macos.md | 12 +++--- .../benchmarking_spark_sql_perf.md | 6 +-- docs/source/contributor-guide/debugging.md | 2 +- .../contributor-guide/iceberg-spark-tests.md | 2 +- docs/source/user-guide/latest/datasources.md | 8 ++-- docs/source/user-guide/latest/iceberg.md | 4 +- docs/source/user-guide/latest/installation.md | 4 +- docs/source/user-guide/latest/kubernetes.md | 14 +++---- docs/source/user-guide/latest/source.md | 10 ++--- kube/Dockerfile | 16 ++++---- pom.xml | 37 ++++++++++--------- 13 files changed, 61 insertions(+), 58 deletions(-) diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index 63b9d0cd37..790cafa5f5 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -74,6 +74,6 @@ jobs: with: platforms: linux/amd64,linux/arm64 push: true - tags: ghcr.io/apache/datafusion-comet:spark-3.5-scala-2.12-${{ env.COMET_VERSION }} + tags: ghcr.io/apache/datafusion-comet:spark-4.0-scala-2.13-${{ env.COMET_VERSION }} file: kube/Dockerfile no-cache: true diff --git a/docs/source/contributor-guide/benchmarking_aws_ec2.md b/docs/source/contributor-guide/benchmarking_aws_ec2.md index 81f15d64ea..bc02d2bf79 100644 --- a/docs/source/contributor-guide/benchmarking_aws_ec2.md +++ b/docs/source/contributor-guide/benchmarking_aws_ec2.md @@ -104,7 +104,7 @@ make release Set `COMET_JAR` environment variable. ```shell -export COMET_JAR=/home/ec2-user/datafusion-comet/spark/target/comet-spark-spark3.5_2.12-$COMET_VERSION.jar +export COMET_JAR=/home/ec2-user/datafusion-comet/spark/target/comet-spark-spark4.0_2.13-$COMET_VERSION.jar ``` ## Run Benchmarks diff --git a/docs/source/contributor-guide/benchmarking_macos.md b/docs/source/contributor-guide/benchmarking_macos.md index e75261e8d5..20ec0f6f11 100644 --- a/docs/source/contributor-guide/benchmarking_macos.md +++ b/docs/source/contributor-guide/benchmarking_macos.md @@ -55,13 +55,13 @@ export DF_BENCH=`pwd` ## Install Spark -Install Apache Spark. This example refers to 3.5.4 version. +Install Apache Spark. This example refers to 4.0.2 version. ```shell -wget https://archive.apache.org/dist/spark/spark-3.5.4/spark-3.5.4-bin-hadoop3.tgz -tar xzf spark-3.5.4-bin-hadoop3.tgz -sudo mv spark-3.5.4-bin-hadoop3 /opt -export SPARK_HOME=/opt/spark-3.5.4-bin-hadoop3/ +wget https://archive.apache.org/dist/spark/spark-4.0.2/spark-4.0.2-bin-hadoop3.tgz +tar xzf spark-4.0.2-bin-hadoop3.tgz +sudo mv spark-4.0.2-bin-hadoop3 /opt +export SPARK_HOME=/opt/spark-4.0.2-bin-hadoop3/ ``` Start Spark in standalone mode: @@ -129,7 +129,7 @@ make release COMET_FEATURES=mimalloc Set `COMET_JAR` to point to the location of the Comet jar file. Example for Comet 0.8 ```shell -export COMET_JAR=`pwd`/spark/target/comet-spark-spark3.5_2.12-0.8.0-SNAPSHOT.jar +export COMET_JAR=`pwd`/spark/target/comet-spark-spark4.0_2.13-0.8.0-SNAPSHOT.jar ``` Run the following command (the `--data` parameter will need to be updated to point to your S3 bucket): diff --git a/docs/source/contributor-guide/benchmarking_spark_sql_perf.md b/docs/source/contributor-guide/benchmarking_spark_sql_perf.md index 538539759c..b9af6469b7 100644 --- a/docs/source/contributor-guide/benchmarking_spark_sql_perf.md +++ b/docs/source/contributor-guide/benchmarking_spark_sql_perf.md @@ -34,8 +34,8 @@ partitioning and writing to Parquet format automatically. ## Prerequisites -- Java 17 (for Spark 3.5+) -- Apache Spark 3.5.x +- Java 17 +- Apache Spark 4.0.x - SBT (Scala Build Tool) - C compiler toolchain (`gcc`, `make`, `flex`, `bison`, `byacc`) @@ -225,7 +225,7 @@ Build Comet from source and launch `spark-shell` with both the Comet and spark-s ```shell make release -export COMET_JAR=$(pwd)/spark/target/comet-spark-spark3.5_2.12-*.jar +export COMET_JAR=$(pwd)/spark/target/comet-spark-spark4.0_2.13-*.jar $SPARK_HOME/bin/spark-shell \ --master $SPARK_MASTER \ diff --git a/docs/source/contributor-guide/debugging.md b/docs/source/contributor-guide/debugging.md index 3356a83893..2c9909c910 100644 --- a/docs/source/contributor-guide/debugging.md +++ b/docs/source/contributor-guide/debugging.md @@ -136,7 +136,7 @@ make release COMET_FEATURES=backtrace Set `RUST_BACKTRACE=1` for the Spark worker/executor process, or for `spark-submit` if running in local mode. ```console -RUST_BACKTRACE=1 $SPARK_HOME/spark-shell --jars spark/target/comet-spark-spark3.5_2.12-$COMET_VERSION.jar --conf spark.plugins=org.apache.spark.CometPlugin --conf spark.comet.enabled=true --conf spark.comet.exec.enabled=true +RUST_BACKTRACE=1 $SPARK_HOME/spark-shell --jars spark/target/comet-spark-spark4.0_2.13-$COMET_VERSION.jar --conf spark.plugins=org.apache.spark.CometPlugin --conf spark.comet.enabled=true --conf spark.comet.exec.enabled=true ``` Get the expanded exception details diff --git a/docs/source/contributor-guide/iceberg-spark-tests.md b/docs/source/contributor-guide/iceberg-spark-tests.md index 38becc0208..cd73ff089f 100644 --- a/docs/source/contributor-guide/iceberg-spark-tests.md +++ b/docs/source/contributor-guide/iceberg-spark-tests.md @@ -40,7 +40,7 @@ Here is an overview of the changes that the diffs make to Iceberg: Run `make release` in Comet to install the Comet JAR into the local Maven repository, specifying the Spark version. ```shell -PROFILES="-Pspark-3.5" make release +PROFILES="-Pspark-4.0" make release ``` ## 2. Clone Iceberg and Apply Diff diff --git a/docs/source/user-guide/latest/datasources.md b/docs/source/user-guide/latest/datasources.md index b79831d804..ef1e343cb5 100644 --- a/docs/source/user-guide/latest/datasources.md +++ b/docs/source/user-guide/latest/datasources.md @@ -69,12 +69,12 @@ Unlike to native Comet reader the Datafusion reader fully supports nested types To build Comet with native DataFusion reader and remote HDFS support it is required to have a JDK installed Example: -Build a Comet for `spark-3.5` provide a JDK path in `JAVA_HOME` +Build a Comet for `spark-4.0` provide a JDK path in `JAVA_HOME` Provide the JRE linker path in `RUSTFLAGS`, the path can vary depending on the system. Typically JRE linker is a part of installed JDK ```shell -export JAVA_HOME="/opt/homebrew/opt/openjdk@11" -make release PROFILES="-Pspark-3.5" COMET_FEATURES=hdfs RUSTFLAGS="-L $JAVA_HOME/libexec/openjdk.jdk/Contents/Home/lib/server" +export JAVA_HOME="/opt/homebrew/opt/openjdk@17" +make release PROFILES="-Pspark-4.0" COMET_FEATURES=hdfs RUSTFLAGS="-L $JAVA_HOME/libexec/openjdk.jdk/Contents/Home/lib/server" ``` Start Comet with experimental reader and HDFS support as [described](installation.md/#run-spark-shell-with-comet-enabled) @@ -149,7 +149,7 @@ docker compose -f kube/local/hdfs-docker-compose.yml up - Build a project with HDFS support ```shell -JAVA_HOME="/opt/homebrew/opt/openjdk@11" make release PROFILES="-Pspark-3.5" COMET_FEATURES=hdfs RUSTFLAGS="-L /opt/homebrew/opt/openjdk@11/libexec/openjdk.jdk/Contents/Home/lib/server" +JAVA_HOME="/opt/homebrew/opt/openjdk@17" make release PROFILES="-Pspark-4.0" COMET_FEATURES=hdfs RUSTFLAGS="-L /opt/homebrew/opt/openjdk@17/libexec/openjdk.jdk/Contents/Home/lib/server" ``` - Run local test diff --git a/docs/source/user-guide/latest/iceberg.md b/docs/source/user-guide/latest/iceberg.md index 24a4bda057..12418b9545 100644 --- a/docs/source/user-guide/latest/iceberg.md +++ b/docs/source/user-guide/latest/iceberg.md @@ -31,7 +31,7 @@ reader is enabled by default. To disable it, set `spark.comet.scan.icebergNative ```shell $SPARK_HOME/bin/spark-shell \ - --packages org.apache.datafusion:comet-spark-spark3.5_2.12:0.14.0,org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.8.1,org.apache.iceberg:iceberg-core:1.8.1 \ + --packages org.apache.datafusion:comet-spark-spark4.0_2.13:0.14.0,org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.8.1,org.apache.iceberg:iceberg-core:1.8.1 \ --repositories https://repo1.maven.org/maven2/ \ --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions \ --conf spark.sql.catalog.spark_catalog=org.apache.iceberg.spark.SparkCatalog \ @@ -106,7 +106,7 @@ configure Spark to use a REST catalog with Comet's native Iceberg scan: ```shell $SPARK_HOME/bin/spark-shell \ - --packages org.apache.datafusion:comet-spark-spark3.5_2.12:0.14.0,org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.8.1,org.apache.iceberg:iceberg-core:1.8.1 \ + --packages org.apache.datafusion:comet-spark-spark4.0_2.13:0.14.0,org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.8.1,org.apache.iceberg:iceberg-core:1.8.1 \ --repositories https://repo1.maven.org/maven2/ \ --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions \ --conf spark.sql.catalog.rest_cat=org.apache.iceberg.spark.SparkCatalog \ diff --git a/docs/source/user-guide/latest/installation.md b/docs/source/user-guide/latest/installation.md index be7c853f7a..75f94ab016 100644 --- a/docs/source/user-guide/latest/installation.md +++ b/docs/source/user-guide/latest/installation.md @@ -100,7 +100,7 @@ See the [Comet Kubernetes Guide](kubernetes.md) guide. Make sure `SPARK_HOME` points to the same Spark version as Comet was built for. ```shell -export COMET_JAR=spark/target/comet-spark-spark3.5_2.12-$COMET_VERSION.jar +export COMET_JAR=spark/target/comet-spark-spark4.0_2.13-$COMET_VERSION.jar $SPARK_HOME/bin/spark-shell \ --jars $COMET_JAR \ @@ -156,7 +156,7 @@ explicitly contain Comet otherwise Spark may use a different class-loader for th components which will then fail at runtime. For example: ``` ---driver-class-path spark/target/comet-spark-spark3.5_2.12-$COMET_VERSION.jar +--driver-class-path spark/target/comet-spark-spark4.0_2.13-$COMET_VERSION.jar ``` Some cluster managers may require additional configuration, see diff --git a/docs/source/user-guide/latest/kubernetes.md b/docs/source/user-guide/latest/kubernetes.md index 2fb037d630..718da49d32 100644 --- a/docs/source/user-guide/latest/kubernetes.md +++ b/docs/source/user-guide/latest/kubernetes.md @@ -69,30 +69,30 @@ metadata: spec: type: Scala mode: cluster - image: apache/datafusion-comet:0.7.0-spark3.5.5-scala2.12-java11 + image: apache/datafusion-comet:0.7.0-spark4.0.2-scala2.13-java17 imagePullPolicy: IfNotPresent mainClass: org.apache.spark.examples.SparkPi - mainApplicationFile: local:///opt/spark/examples/jars/spark-examples_2.12-3.5.5.jar + mainApplicationFile: local:///opt/spark/examples/jars/spark-examples_2.13-4.0.2.jar sparkConf: - "spark.executor.extraClassPath": "/opt/spark/jars/comet-spark-spark3.5_2.12-0.7.0.jar" - "spark.driver.extraClassPath": "/opt/spark/jars/comet-spark-spark3.5_2.12-0.7.0.jar" + "spark.executor.extraClassPath": "/opt/spark/jars/comet-spark-spark4.0_2.13-0.7.0.jar" + "spark.driver.extraClassPath": "/opt/spark/jars/comet-spark-spark4.0_2.13-0.7.0.jar" "spark.plugins": "org.apache.spark.CometPlugin" "spark.comet.enabled": "true" "spark.comet.exec.enabled": "true" "spark.comet.exec.shuffle.enabled": "true" "spark.comet.exec.shuffle.mode": "auto" "spark.shuffle.manager": "org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager" - sparkVersion: 3.5.6 + sparkVersion: 4.0.2 driver: labels: - version: 3.5.6 + version: 4.0.2 cores: 1 coreLimit: 1200m memory: 512m serviceAccount: spark-operator-spark executor: labels: - version: 3.5.6 + version: 4.0.2 instances: 1 cores: 1 coreLimit: 1200m diff --git a/docs/source/user-guide/latest/source.md b/docs/source/user-guide/latest/source.md index eb56e1f21b..5ad33ecfe1 100644 --- a/docs/source/user-guide/latest/source.md +++ b/docs/source/user-guide/latest/source.md @@ -38,7 +38,7 @@ cd apache-datafusion-comet-$COMET_VERSION Build ```console -make release-nogit PROFILES="-Pspark-3.5" +make release-nogit PROFILES="-Pspark-4.0" ``` ## Building from the GitHub repository @@ -53,17 +53,17 @@ Build Comet for a specific Spark version: ```console cd datafusion-comet -make release PROFILES="-Pspark-3.5" +make release PROFILES="-Pspark-4.0" ``` -Note that the project builds for Scala 2.12 by default but can be built for Scala 2.13 using an additional profile: +Note that the project builds for Scala 2.13 by default but can be built for Scala 2.12 using an additional profile: ```console -make release PROFILES="-Pspark-3.5 -Pscala-2.13" +make release PROFILES="-Pspark-3.5 -Pscala-2.12" ``` To build Comet from the source distribution on an isolated environment without an access to `github.com` it is necessary to disable `git-commit-id-maven-plugin`, otherwise you will face errors that there is no access to the git during the build process. In that case you may use: ```console -make release-nogit PROFILES="-Pspark-3.5" +make release-nogit PROFILES="-Pspark-4.0" ``` diff --git a/kube/Dockerfile b/kube/Dockerfile index 699aeeb210..a078277407 100644 --- a/kube/Dockerfile +++ b/kube/Dockerfile @@ -15,14 +15,14 @@ # limitations under the License. # -FROM apache/spark:3.5.8 AS builder +FROM apache/spark:4.0.2 AS builder USER root -# Installing JDK11 as the image comes with JRE +# Installing JDK17 as the image comes with JRE RUN apt update \ && apt install -y curl \ - && apt install -y openjdk-11-jdk \ + && apt install -y openjdk-17-jdk \ && apt clean RUN apt install -y gcc-10 g++-10 cpp-10 unzip @@ -37,8 +37,8 @@ ENV PATH="$PATH:/root/.local/bin" RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y ENV PATH="/root/.cargo/bin:${PATH}" ENV RUSTFLAGS="-C debuginfo=line-tables-only -C incremental=false" -ENV SPARK_VERSION=3.5 -ENV SCALA_VERSION=2.12 +ENV SPARK_VERSION=4.0 +ENV SCALA_VERSION=2.13 # copy source files to Docker image RUN mkdir /comet @@ -70,9 +70,9 @@ RUN mkdir -p /root/.m2 && \ RUN cd /comet \ && JAVA_HOME=$(readlink -f $(which javac) | sed "s/\/bin\/javac//") make release-nogit PROFILES="-Pspark-$SPARK_VERSION -Pscala-$SCALA_VERSION" -FROM apache/spark:3.5.8 -ENV SPARK_VERSION=3.5 -ENV SCALA_VERSION=2.12 +FROM apache/spark:4.0.2 +ENV SPARK_VERSION=4.0 +ENV SCALA_VERSION=2.13 USER root # note the use of a wildcard in the file name so that this works with both snapshot and final release versions diff --git a/pom.xml b/pom.xml index b83a6fd45b..c08cf0c1a3 100644 --- a/pom.xml +++ b/pom.xml @@ -65,24 +65,24 @@ under the License. 1.7.0 3.6.1 0.16.1 - 2.12.18 - 2.12 + 2.13.16 + 2.13 4.9.6 3.2.16 2.2.0 - 3.5.8 - 3.5 + 4.0.2 + 4.0 provided 3.25.5 - 1.13.1 + 1.15.2 provided 3.3.4 18.3.0 1.9.13 2.43.0 0.8.11 - 4.8.8 - 2.0.7 + 4.13.6 + 2.0.16 33.2.1-jre 1.21.0 2.31.51 @@ -116,8 +116,8 @@ under the License. -Djdk.reflect.useDirectMethodHandle=false -ea -Xmx4g -Xss4m ${extraJavaTestArgs} - spark-3.x - spark-3.5 + spark-4.x + spark-4.0 @@ -635,10 +635,13 @@ under the License. spark-3.4 2.12.17 + 2.12 3.4.3 3.4 1.13.1 + 4.8.8 2.0.6 + spark-3.x spark-3.4 11 ${java.version} @@ -650,10 +653,13 @@ under the License. spark-3.5 2.12.18 + 2.12 3.5.8 3.5 1.13.1 + 4.8.8 2.0.7 + spark-3.x spark-3.5 11 ${java.version} @@ -662,10 +668,8 @@ under the License. - spark-4.0 - 2.13.16 2.13 4.0.2 @@ -675,7 +679,6 @@ under the License. 2.0.16 spark-4.x spark-4.0 - 17 ${java.version} ${java.version} @@ -729,15 +732,15 @@ under the License. scala-2.12 + + 2.12.18 + 2.12 + 4.8.8 + scala-2.13 - - 2.13.16 - 2.13 - 4.13.6 - From db67718f04f08c6160a5e2156786462d3b704e4d Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Tue, 28 Apr 2026 19:18:01 -0600 Subject: [PATCH 2/6] fix: restore scala-2.13 profile properties and update TPC-DS/TPC-H JDK to 17 The scala-2.13 profile must retain its properties so that `-Pspark-3.x -Pscala-2.13` correctly overrides the Spark profile's scala.binary.version=2.12. Without this, Iceberg CI builds produce _2.12 artifacts when _2.13 is expected. The TPC-DS/TPC-H verification jobs used JDK 11 with no explicit Spark profile, so they now inherit the Spark 4.0 default which requires JDK 17. Co-Authored-By: Claude Opus 4.6 --- .github/workflows/pr_build_linux.yml | 4 ++-- pom.xml | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pr_build_linux.yml b/.github/workflows/pr_build_linux.yml index a8d925b1c2..f57a95c0f7 100644 --- a/.github/workflows/pr_build_linux.yml +++ b/.github/workflows/pr_build_linux.yml @@ -444,7 +444,7 @@ jobs: uses: ./.github/actions/setup-builder with: rust-version: ${{ env.RUST_VERSION }} - jdk-version: 11 + jdk-version: 17 - name: Download native library uses: actions/download-artifact@v8 @@ -502,7 +502,7 @@ jobs: uses: ./.github/actions/setup-builder with: rust-version: ${{ env.RUST_VERSION }} - jdk-version: 11 + jdk-version: 17 - name: Download native library uses: actions/download-artifact@v8 diff --git a/pom.xml b/pom.xml index c08cf0c1a3..5199c8a453 100644 --- a/pom.xml +++ b/pom.xml @@ -741,6 +741,11 @@ under the License. scala-2.13 + + 2.13.16 + 2.13 + 4.13.6 + From b1536947aeaa4e4fba695013e0b4e8fad764eeed Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Tue, 28 Apr 2026 19:39:04 -0600 Subject: [PATCH 3/6] fix: move activeByDefault from spark-3.5 to spark-4.0 in spark/pom.xml The spark/pom.xml Iceberg dependency profiles use activeByDefault to provide the right Iceberg version when no -Pspark-* is passed. Since the default is now Spark 4.0, the activeByDefault must be on the spark-4.0 profile (Iceberg 1.10.0) rather than spark-3.5 (Iceberg 1.8.1), otherwise Maven resolves the non-existent artifact iceberg-spark-runtime-4.0_2.13:1.8.1. Co-Authored-By: Claude Opus 4.6 --- spark/pom.xml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/spark/pom.xml b/spark/pom.xml index 722cebade3..fc0fc5c99a 100644 --- a/spark/pom.xml +++ b/spark/pom.xml @@ -223,9 +223,6 @@ under the License. spark-3.5 - - true - org.apache.iceberg @@ -251,6 +248,9 @@ under the License. spark-4.0 + + true + org.apache.iceberg From c740430c8edc1d23c2c16752ea22c30315755239 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Wed, 6 May 2026 08:36:50 -0600 Subject: [PATCH 4/6] build: bump default Maven profile from Spark 4.0 to Spark 4.1 Updates root pom.xml defaults to Spark 4.1.1, Scala 2.13.17, Parquet 1.16.0, slf4j 2.0.17, and shim sources to spark-4.1. Moves activeByDefault from spark-4.0 to spark-4.1 in spark/pom.xml. Bumps kube/Dockerfile to apache/spark:4.1.1, the docker-publish image tag, and all docs and example commands that the previous default-bump PR had updated to spark4.0_2.13. --- .github/workflows/docker-publish.yml | 2 +- .../contributor-guide/benchmarking_aws_ec2.md | 2 +- .../source/contributor-guide/benchmarking_macos.md | 12 ++++++------ .../benchmarking_spark_sql_perf.md | 4 ++-- docs/source/contributor-guide/debugging.md | 2 +- .../contributor-guide/iceberg-spark-tests.md | 2 +- docs/source/user-guide/latest/datasources.md | 6 +++--- docs/source/user-guide/latest/iceberg.md | 4 ++-- docs/source/user-guide/latest/installation.md | 6 +++--- docs/source/user-guide/latest/kubernetes.md | 14 +++++++------- docs/source/user-guide/latest/source.md | 6 +++--- kube/Dockerfile | 8 ++++---- pom.xml | 14 ++++++-------- spark/pom.xml | 6 +++--- 14 files changed, 43 insertions(+), 45 deletions(-) diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index 790cafa5f5..e3bae668c6 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -74,6 +74,6 @@ jobs: with: platforms: linux/amd64,linux/arm64 push: true - tags: ghcr.io/apache/datafusion-comet:spark-4.0-scala-2.13-${{ env.COMET_VERSION }} + tags: ghcr.io/apache/datafusion-comet:spark-4.1-scala-2.13-${{ env.COMET_VERSION }} file: kube/Dockerfile no-cache: true diff --git a/docs/source/contributor-guide/benchmarking_aws_ec2.md b/docs/source/contributor-guide/benchmarking_aws_ec2.md index bc02d2bf79..bb92a8958f 100644 --- a/docs/source/contributor-guide/benchmarking_aws_ec2.md +++ b/docs/source/contributor-guide/benchmarking_aws_ec2.md @@ -104,7 +104,7 @@ make release Set `COMET_JAR` environment variable. ```shell -export COMET_JAR=/home/ec2-user/datafusion-comet/spark/target/comet-spark-spark4.0_2.13-$COMET_VERSION.jar +export COMET_JAR=/home/ec2-user/datafusion-comet/spark/target/comet-spark-spark4.1_2.13-$COMET_VERSION.jar ``` ## Run Benchmarks diff --git a/docs/source/contributor-guide/benchmarking_macos.md b/docs/source/contributor-guide/benchmarking_macos.md index 20ec0f6f11..4c37a32a26 100644 --- a/docs/source/contributor-guide/benchmarking_macos.md +++ b/docs/source/contributor-guide/benchmarking_macos.md @@ -55,13 +55,13 @@ export DF_BENCH=`pwd` ## Install Spark -Install Apache Spark. This example refers to 4.0.2 version. +Install Apache Spark. This example refers to 4.1.1 version. ```shell -wget https://archive.apache.org/dist/spark/spark-4.0.2/spark-4.0.2-bin-hadoop3.tgz -tar xzf spark-4.0.2-bin-hadoop3.tgz -sudo mv spark-4.0.2-bin-hadoop3 /opt -export SPARK_HOME=/opt/spark-4.0.2-bin-hadoop3/ +wget https://archive.apache.org/dist/spark/spark-4.1.1/spark-4.1.1-bin-hadoop3.tgz +tar xzf spark-4.1.1-bin-hadoop3.tgz +sudo mv spark-4.1.1-bin-hadoop3 /opt +export SPARK_HOME=/opt/spark-4.1.1-bin-hadoop3/ ``` Start Spark in standalone mode: @@ -129,7 +129,7 @@ make release COMET_FEATURES=mimalloc Set `COMET_JAR` to point to the location of the Comet jar file. Example for Comet 0.8 ```shell -export COMET_JAR=`pwd`/spark/target/comet-spark-spark4.0_2.13-0.8.0-SNAPSHOT.jar +export COMET_JAR=`pwd`/spark/target/comet-spark-spark4.1_2.13-0.8.0-SNAPSHOT.jar ``` Run the following command (the `--data` parameter will need to be updated to point to your S3 bucket): diff --git a/docs/source/contributor-guide/benchmarking_spark_sql_perf.md b/docs/source/contributor-guide/benchmarking_spark_sql_perf.md index b9af6469b7..d893319443 100644 --- a/docs/source/contributor-guide/benchmarking_spark_sql_perf.md +++ b/docs/source/contributor-guide/benchmarking_spark_sql_perf.md @@ -35,7 +35,7 @@ partitioning and writing to Parquet format automatically. ## Prerequisites - Java 17 -- Apache Spark 4.0.x +- Apache Spark 4.1.x - SBT (Scala Build Tool) - C compiler toolchain (`gcc`, `make`, `flex`, `bison`, `byacc`) @@ -225,7 +225,7 @@ Build Comet from source and launch `spark-shell` with both the Comet and spark-s ```shell make release -export COMET_JAR=$(pwd)/spark/target/comet-spark-spark4.0_2.13-*.jar +export COMET_JAR=$(pwd)/spark/target/comet-spark-spark4.1_2.13-*.jar $SPARK_HOME/bin/spark-shell \ --master $SPARK_MASTER \ diff --git a/docs/source/contributor-guide/debugging.md b/docs/source/contributor-guide/debugging.md index 2c9909c910..e5372d922d 100644 --- a/docs/source/contributor-guide/debugging.md +++ b/docs/source/contributor-guide/debugging.md @@ -136,7 +136,7 @@ make release COMET_FEATURES=backtrace Set `RUST_BACKTRACE=1` for the Spark worker/executor process, or for `spark-submit` if running in local mode. ```console -RUST_BACKTRACE=1 $SPARK_HOME/spark-shell --jars spark/target/comet-spark-spark4.0_2.13-$COMET_VERSION.jar --conf spark.plugins=org.apache.spark.CometPlugin --conf spark.comet.enabled=true --conf spark.comet.exec.enabled=true +RUST_BACKTRACE=1 $SPARK_HOME/spark-shell --jars spark/target/comet-spark-spark4.1_2.13-$COMET_VERSION.jar --conf spark.plugins=org.apache.spark.CometPlugin --conf spark.comet.enabled=true --conf spark.comet.exec.enabled=true ``` Get the expanded exception details diff --git a/docs/source/contributor-guide/iceberg-spark-tests.md b/docs/source/contributor-guide/iceberg-spark-tests.md index cd73ff089f..f37ee4f1f3 100644 --- a/docs/source/contributor-guide/iceberg-spark-tests.md +++ b/docs/source/contributor-guide/iceberg-spark-tests.md @@ -40,7 +40,7 @@ Here is an overview of the changes that the diffs make to Iceberg: Run `make release` in Comet to install the Comet JAR into the local Maven repository, specifying the Spark version. ```shell -PROFILES="-Pspark-4.0" make release +PROFILES="-Pspark-4.1" make release ``` ## 2. Clone Iceberg and Apply Diff diff --git a/docs/source/user-guide/latest/datasources.md b/docs/source/user-guide/latest/datasources.md index ef1e343cb5..5fb47fa9a9 100644 --- a/docs/source/user-guide/latest/datasources.md +++ b/docs/source/user-guide/latest/datasources.md @@ -69,12 +69,12 @@ Unlike to native Comet reader the Datafusion reader fully supports nested types To build Comet with native DataFusion reader and remote HDFS support it is required to have a JDK installed Example: -Build a Comet for `spark-4.0` provide a JDK path in `JAVA_HOME` +Build a Comet for `spark-4.1` provide a JDK path in `JAVA_HOME` Provide the JRE linker path in `RUSTFLAGS`, the path can vary depending on the system. Typically JRE linker is a part of installed JDK ```shell export JAVA_HOME="/opt/homebrew/opt/openjdk@17" -make release PROFILES="-Pspark-4.0" COMET_FEATURES=hdfs RUSTFLAGS="-L $JAVA_HOME/libexec/openjdk.jdk/Contents/Home/lib/server" +make release PROFILES="-Pspark-4.1" COMET_FEATURES=hdfs RUSTFLAGS="-L $JAVA_HOME/libexec/openjdk.jdk/Contents/Home/lib/server" ``` Start Comet with experimental reader and HDFS support as [described](installation.md/#run-spark-shell-with-comet-enabled) @@ -149,7 +149,7 @@ docker compose -f kube/local/hdfs-docker-compose.yml up - Build a project with HDFS support ```shell -JAVA_HOME="/opt/homebrew/opt/openjdk@17" make release PROFILES="-Pspark-4.0" COMET_FEATURES=hdfs RUSTFLAGS="-L /opt/homebrew/opt/openjdk@17/libexec/openjdk.jdk/Contents/Home/lib/server" +JAVA_HOME="/opt/homebrew/opt/openjdk@17" make release PROFILES="-Pspark-4.1" COMET_FEATURES=hdfs RUSTFLAGS="-L /opt/homebrew/opt/openjdk@17/libexec/openjdk.jdk/Contents/Home/lib/server" ``` - Run local test diff --git a/docs/source/user-guide/latest/iceberg.md b/docs/source/user-guide/latest/iceberg.md index 12418b9545..cb6fdab2c9 100644 --- a/docs/source/user-guide/latest/iceberg.md +++ b/docs/source/user-guide/latest/iceberg.md @@ -31,7 +31,7 @@ reader is enabled by default. To disable it, set `spark.comet.scan.icebergNative ```shell $SPARK_HOME/bin/spark-shell \ - --packages org.apache.datafusion:comet-spark-spark4.0_2.13:0.14.0,org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.8.1,org.apache.iceberg:iceberg-core:1.8.1 \ + --packages org.apache.datafusion:comet-spark-spark4.1_2.13:0.14.0,org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.8.1,org.apache.iceberg:iceberg-core:1.8.1 \ --repositories https://repo1.maven.org/maven2/ \ --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions \ --conf spark.sql.catalog.spark_catalog=org.apache.iceberg.spark.SparkCatalog \ @@ -106,7 +106,7 @@ configure Spark to use a REST catalog with Comet's native Iceberg scan: ```shell $SPARK_HOME/bin/spark-shell \ - --packages org.apache.datafusion:comet-spark-spark4.0_2.13:0.14.0,org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.8.1,org.apache.iceberg:iceberg-core:1.8.1 \ + --packages org.apache.datafusion:comet-spark-spark4.1_2.13:0.14.0,org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.8.1,org.apache.iceberg:iceberg-core:1.8.1 \ --repositories https://repo1.maven.org/maven2/ \ --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions \ --conf spark.sql.catalog.rest_cat=org.apache.iceberg.spark.SparkCatalog \ diff --git a/docs/source/user-guide/latest/installation.md b/docs/source/user-guide/latest/installation.md index 50bd430f10..4a6521b575 100644 --- a/docs/source/user-guide/latest/installation.md +++ b/docs/source/user-guide/latest/installation.md @@ -85,7 +85,7 @@ Here are the direct links for downloading the Comet $COMET_VERSION jar file. - [Comet plugin for Spark 3.5 / Scala 2.12](https://repo1.maven.org/maven2/org/apache/datafusion/comet-spark-spark3.5_2.12/$COMET_VERSION/comet-spark-spark3.5_2.12-$COMET_VERSION.jar) - [Comet plugin for Spark 3.5 / Scala 2.13](https://repo1.maven.org/maven2/org/apache/datafusion/comet-spark-spark3.5_2.13/$COMET_VERSION/comet-spark-spark3.5_2.13-$COMET_VERSION.jar) - [Comet plugin for Spark 4.0 / Scala 2.13](https://repo1.maven.org/maven2/org/apache/datafusion/comet-spark-spark4.0_2.13/$COMET_VERSION/comet-spark-spark4.0_2.13-$COMET_VERSION.jar) -- [Comet plugin for Spark 4.1 / Scala 2.13 (Experimental)](https://repo1.maven.org/maven2/org/apache/datafusion/comet-spark-spark4.1_2.13/$COMET_VERSION/comet-spark-spark4.1_2.13-$COMET_VERSION.jar) +- [Comet plugin for Spark 4.1 / Scala 2.13](https://repo1.maven.org/maven2/org/apache/datafusion/comet-spark-spark4.1_2.13/$COMET_VERSION/comet-spark-spark4.1_2.13-$COMET_VERSION.jar) - [Comet plugin for Spark 4.2 / Scala 2.13 (Experimental)](https://repo1.maven.org/maven2/org/apache/datafusion/comet-spark-spark4.2_2.13/$COMET_VERSION/comet-spark-spark4.2_2.13-$COMET_VERSION.jar) @@ -105,7 +105,7 @@ See the [Comet Kubernetes Guide](kubernetes.md) guide. Make sure `SPARK_HOME` points to the same Spark version as Comet was built for. ```shell -export COMET_JAR=spark/target/comet-spark-spark4.0_2.13-$COMET_VERSION.jar +export COMET_JAR=spark/target/comet-spark-spark4.1_2.13-$COMET_VERSION.jar $SPARK_HOME/bin/spark-shell \ --jars $COMET_JAR \ @@ -161,7 +161,7 @@ explicitly contain Comet otherwise Spark may use a different class-loader for th components which will then fail at runtime. For example: ``` ---driver-class-path spark/target/comet-spark-spark4.0_2.13-$COMET_VERSION.jar +--driver-class-path spark/target/comet-spark-spark4.1_2.13-$COMET_VERSION.jar ``` Some cluster managers may require additional configuration, see diff --git a/docs/source/user-guide/latest/kubernetes.md b/docs/source/user-guide/latest/kubernetes.md index 718da49d32..fd84b7ad9b 100644 --- a/docs/source/user-guide/latest/kubernetes.md +++ b/docs/source/user-guide/latest/kubernetes.md @@ -69,30 +69,30 @@ metadata: spec: type: Scala mode: cluster - image: apache/datafusion-comet:0.7.0-spark4.0.2-scala2.13-java17 + image: apache/datafusion-comet:0.7.0-spark4.1.1-scala2.13-java17 imagePullPolicy: IfNotPresent mainClass: org.apache.spark.examples.SparkPi - mainApplicationFile: local:///opt/spark/examples/jars/spark-examples_2.13-4.0.2.jar + mainApplicationFile: local:///opt/spark/examples/jars/spark-examples_2.13-4.1.1.jar sparkConf: - "spark.executor.extraClassPath": "/opt/spark/jars/comet-spark-spark4.0_2.13-0.7.0.jar" - "spark.driver.extraClassPath": "/opt/spark/jars/comet-spark-spark4.0_2.13-0.7.0.jar" + "spark.executor.extraClassPath": "/opt/spark/jars/comet-spark-spark4.1_2.13-0.7.0.jar" + "spark.driver.extraClassPath": "/opt/spark/jars/comet-spark-spark4.1_2.13-0.7.0.jar" "spark.plugins": "org.apache.spark.CometPlugin" "spark.comet.enabled": "true" "spark.comet.exec.enabled": "true" "spark.comet.exec.shuffle.enabled": "true" "spark.comet.exec.shuffle.mode": "auto" "spark.shuffle.manager": "org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager" - sparkVersion: 4.0.2 + sparkVersion: 4.1.1 driver: labels: - version: 4.0.2 + version: 4.1.1 cores: 1 coreLimit: 1200m memory: 512m serviceAccount: spark-operator-spark executor: labels: - version: 4.0.2 + version: 4.1.1 instances: 1 cores: 1 coreLimit: 1200m diff --git a/docs/source/user-guide/latest/source.md b/docs/source/user-guide/latest/source.md index 5ad33ecfe1..6ae43be56a 100644 --- a/docs/source/user-guide/latest/source.md +++ b/docs/source/user-guide/latest/source.md @@ -38,7 +38,7 @@ cd apache-datafusion-comet-$COMET_VERSION Build ```console -make release-nogit PROFILES="-Pspark-4.0" +make release-nogit PROFILES="-Pspark-4.1" ``` ## Building from the GitHub repository @@ -53,7 +53,7 @@ Build Comet for a specific Spark version: ```console cd datafusion-comet -make release PROFILES="-Pspark-4.0" +make release PROFILES="-Pspark-4.1" ``` Note that the project builds for Scala 2.13 by default but can be built for Scala 2.12 using an additional profile: @@ -65,5 +65,5 @@ make release PROFILES="-Pspark-3.5 -Pscala-2.12" To build Comet from the source distribution on an isolated environment without an access to `github.com` it is necessary to disable `git-commit-id-maven-plugin`, otherwise you will face errors that there is no access to the git during the build process. In that case you may use: ```console -make release-nogit PROFILES="-Pspark-4.0" +make release-nogit PROFILES="-Pspark-4.1" ``` diff --git a/kube/Dockerfile b/kube/Dockerfile index a078277407..d13b401a01 100644 --- a/kube/Dockerfile +++ b/kube/Dockerfile @@ -15,7 +15,7 @@ # limitations under the License. # -FROM apache/spark:4.0.2 AS builder +FROM apache/spark:4.1.1 AS builder USER root @@ -37,7 +37,7 @@ ENV PATH="$PATH:/root/.local/bin" RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y ENV PATH="/root/.cargo/bin:${PATH}" ENV RUSTFLAGS="-C debuginfo=line-tables-only -C incremental=false" -ENV SPARK_VERSION=4.0 +ENV SPARK_VERSION=4.1 ENV SCALA_VERSION=2.13 # copy source files to Docker image @@ -70,8 +70,8 @@ RUN mkdir -p /root/.m2 && \ RUN cd /comet \ && JAVA_HOME=$(readlink -f $(which javac) | sed "s/\/bin\/javac//") make release-nogit PROFILES="-Pspark-$SPARK_VERSION -Pscala-$SCALA_VERSION" -FROM apache/spark:4.0.2 -ENV SPARK_VERSION=4.0 +FROM apache/spark:4.1.1 +ENV SPARK_VERSION=4.1 ENV SCALA_VERSION=2.13 USER root diff --git a/pom.xml b/pom.xml index 5199c8a453..f378bee7a8 100644 --- a/pom.xml +++ b/pom.xml @@ -65,16 +65,16 @@ under the License. 1.7.0 3.6.1 0.16.1 - 2.13.16 + 2.13.17 2.13 4.9.6 3.2.16 2.2.0 - 4.0.2 - 4.0 + 4.1.1 + 4.1 provided 3.25.5 - 1.15.2 + 1.16.0 provided 3.3.4 18.3.0 @@ -82,7 +82,7 @@ under the License. 2.43.0 0.8.11 4.13.6 - 2.0.16 + 2.0.17 33.2.1-jre 1.21.0 2.31.51 @@ -117,7 +117,7 @@ under the License. -ea -Xmx4g -Xss4m ${extraJavaTestArgs} spark-4.x - spark-4.0 + spark-4.1 @@ -686,7 +686,6 @@ under the License. - spark-4.1 17 ${java.version} ${java.version} diff --git a/spark/pom.xml b/spark/pom.xml index fc0fc5c99a..f36a132efc 100644 --- a/spark/pom.xml +++ b/spark/pom.xml @@ -248,9 +248,6 @@ under the License. spark-4.0 - - true - org.apache.iceberg @@ -275,6 +272,9 @@ under the License. spark-4.1 + + true + From 7f0b1eb3bf831961d7a3f437c0da300068353e99 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Wed, 6 May 2026 16:46:39 -0600 Subject: [PATCH 5/6] ci: add JAVA_TOOL_OPTIONS to TPC-H/TPC-DS verify jobs for JDK 17 The TPC-H data generation step launches Spark via `mvnw exec:java`, which falls outside surefire's argLine, so the --add-opens flags from pom.xml do not apply. With this PR bumping these jobs from JDK 11 to JDK 17, GenTPCHData hangs in shuffle when Kryo reflectively probes java.nio.ByteBuffer.hb. Set JAVA_TOOL_OPTIONS at the job level so both exec:java and surefire forks get the required flags. --- .github/workflows/pr_build_linux.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/pr_build_linux.yml b/.github/workflows/pr_build_linux.yml index b8b025f0db..437ba49dda 100644 --- a/.github/workflows/pr_build_linux.yml +++ b/.github/workflows/pr_build_linux.yml @@ -451,6 +451,8 @@ jobs: runs-on: ${{ github.repository_owner == 'apache' && format('runs-on={0},family=m8a+m7a+c8a,cpu=16,image=ubuntu24-full-x64,extras=s3-cache,disk=large,tag=datafusion-comet', github.run_id) || 'ubuntu-latest' }} container: image: amd64/rust + env: + JAVA_TOOL_OPTIONS: --add-exports=java.base/sun.nio.ch=ALL-UNNAMED --add-exports=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.lang=ALL-UNNAMED steps: - uses: runs-on/action@742bf56072eb4845a0f94b3394673e4903c90ff0 # v2.1.0 @@ -505,6 +507,8 @@ jobs: runs-on: ${{ github.repository_owner == 'apache' && format('runs-on={0},family=m8a+m7a+c8a,cpu=16,image=ubuntu24-full-x64,extras=s3-cache,disk=large,tag=datafusion-comet', github.run_id) || 'ubuntu-latest' }} container: image: amd64/rust + env: + JAVA_TOOL_OPTIONS: --add-exports=java.base/sun.nio.ch=ALL-UNNAMED --add-exports=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.lang=ALL-UNNAMED strategy: matrix: join: [sort_merge, broadcast, hash] From 185b69c342c6d4dd81cef914b3e1ddf541055eee Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Wed, 6 May 2026 17:53:23 -0600 Subject: [PATCH 6/6] ci: expand JAVA_TOOL_OPTIONS to cover java.lang.invoke for TPC-H/TPC-DS jobs The previous flag set only opened java.lang and java.nio, but with the default profile now Spark 4.1 / Scala 2.13 the TPC-H GenTPCHData run trips on SerializedLambda.capturingClass and dies with InaccessibleObjectException for java.lang.invoke. Mirror the full list from pom.xml's extraJavaTestArgs so any other reflection-driven access into java.base also has the matching --add-opens grant. --- .github/workflows/pr_build_linux.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pr_build_linux.yml b/.github/workflows/pr_build_linux.yml index 437ba49dda..2826aeeecc 100644 --- a/.github/workflows/pr_build_linux.yml +++ b/.github/workflows/pr_build_linux.yml @@ -452,7 +452,7 @@ jobs: container: image: amd64/rust env: - JAVA_TOOL_OPTIONS: --add-exports=java.base/sun.nio.ch=ALL-UNNAMED --add-exports=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.lang=ALL-UNNAMED + JAVA_TOOL_OPTIONS: --add-exports=java.base/sun.nio.ch=ALL-UNNAMED --add-exports=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/jdk.internal.ref=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED steps: - uses: runs-on/action@742bf56072eb4845a0f94b3394673e4903c90ff0 # v2.1.0 @@ -508,7 +508,7 @@ jobs: container: image: amd64/rust env: - JAVA_TOOL_OPTIONS: --add-exports=java.base/sun.nio.ch=ALL-UNNAMED --add-exports=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.lang=ALL-UNNAMED + JAVA_TOOL_OPTIONS: --add-exports=java.base/sun.nio.ch=ALL-UNNAMED --add-exports=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/jdk.internal.ref=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED strategy: matrix: join: [sort_merge, broadcast, hash]