diff --git a/.github/workflows/iceberg_spark_test_1_10.yml b/.github/workflows/iceberg_spark_test_1_10.yml new file mode 100644 index 0000000000..8df8a22c74 --- /dev/null +++ b/.github/workflows/iceberg_spark_test_1_10.yml @@ -0,0 +1,77 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Runs on every PR and on main. Latest supported Iceberg version, paired with +# Spark 3.5. +name: Iceberg Spark SQL Tests (Iceberg 1.10) + +concurrency: + group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} + cancel-in-progress: true + +on: + push: + branches: + - main + paths: + - "native/**/src/**" + - "native/**/Cargo.toml" + - "native/Cargo.lock" + - "!native/hdfs/**" + - "!native/fs-hdfs/**" + - "common/src/main/**" + - "common/pom.xml" + - "spark/src/main/**" + - "!spark/src/main/scala/org/apache/comet/GenerateDocs.scala" + - "spark/pom.xml" + - "dev/diffs/iceberg/**" + - "pom.xml" + - "rust-toolchain.toml" + - ".github/workflows/iceberg_spark_test_1_10.yml" + - ".github/workflows/iceberg_spark_test_reusable.yml" + - ".github/actions/setup-builder/**" + - ".github/actions/setup-iceberg-builder/**" + pull_request: + paths: + - "native/**/src/**" + - "native/**/Cargo.toml" + - "native/Cargo.lock" + - "!native/hdfs/**" + - "!native/fs-hdfs/**" + - "common/src/main/**" + - "common/pom.xml" + - "spark/src/main/**" + - "!spark/src/main/scala/org/apache/comet/GenerateDocs.scala" + - "spark/pom.xml" + - "dev/diffs/iceberg/**" + - "pom.xml" + - "rust-toolchain.toml" + - ".github/workflows/iceberg_spark_test_1_10.yml" + - ".github/workflows/iceberg_spark_test_reusable.yml" + - ".github/actions/setup-builder/**" + - ".github/actions/setup-iceberg-builder/**" + workflow_dispatch: + +jobs: + iceberg-spark: + uses: ./.github/workflows/iceberg_spark_test_reusable.yml + with: + iceberg-short: '1.10' + iceberg-full: '1.10.0' + spark-short: '3.5' + spark-full: '3.5.8' + java: 17 diff --git a/.github/workflows/iceberg_spark_test_1_8.yml b/.github/workflows/iceberg_spark_test_1_8.yml new file mode 100644 index 0000000000..111d1af46a --- /dev/null +++ b/.github/workflows/iceberg_spark_test_1_8.yml @@ -0,0 +1,57 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Runs on main only. Oldest supported Iceberg version, paired with Spark 3.4. +name: Iceberg Spark SQL Tests (Iceberg 1.8) + +concurrency: + group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} + cancel-in-progress: true + +on: + push: + branches: + - main + paths: + - "native/**/src/**" + - "native/**/Cargo.toml" + - "native/Cargo.lock" + - "!native/hdfs/**" + - "!native/fs-hdfs/**" + - "common/src/main/**" + - "common/pom.xml" + - "spark/src/main/**" + - "!spark/src/main/scala/org/apache/comet/GenerateDocs.scala" + - "spark/pom.xml" + - "dev/diffs/iceberg/**" + - "pom.xml" + - "rust-toolchain.toml" + - ".github/workflows/iceberg_spark_test_1_8.yml" + - ".github/workflows/iceberg_spark_test_reusable.yml" + - ".github/actions/setup-builder/**" + - ".github/actions/setup-iceberg-builder/**" + workflow_dispatch: + +jobs: + iceberg-spark: + uses: ./.github/workflows/iceberg_spark_test_reusable.yml + with: + iceberg-short: '1.8' + iceberg-full: '1.8.1' + spark-short: '3.4' + spark-full: '3.4.3' + java: 11 diff --git a/.github/workflows/iceberg_spark_test_1_9.yml b/.github/workflows/iceberg_spark_test_1_9.yml new file mode 100644 index 0000000000..f8a2361cf0 --- /dev/null +++ b/.github/workflows/iceberg_spark_test_1_9.yml @@ -0,0 +1,57 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Runs on main only. Iceberg 1.9 paired with Spark 3.5. +name: Iceberg Spark SQL Tests (Iceberg 1.9) + +concurrency: + group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} + cancel-in-progress: true + +on: + push: + branches: + - main + paths: + - "native/**/src/**" + - "native/**/Cargo.toml" + - "native/Cargo.lock" + - "!native/hdfs/**" + - "!native/fs-hdfs/**" + - "common/src/main/**" + - "common/pom.xml" + - "spark/src/main/**" + - "!spark/src/main/scala/org/apache/comet/GenerateDocs.scala" + - "spark/pom.xml" + - "dev/diffs/iceberg/**" + - "pom.xml" + - "rust-toolchain.toml" + - ".github/workflows/iceberg_spark_test_1_9.yml" + - ".github/workflows/iceberg_spark_test_reusable.yml" + - ".github/actions/setup-builder/**" + - ".github/actions/setup-iceberg-builder/**" + workflow_dispatch: + +jobs: + iceberg-spark: + uses: ./.github/workflows/iceberg_spark_test_reusable.yml + with: + iceberg-short: '1.9' + iceberg-full: '1.9.1' + spark-short: '3.5' + spark-full: '3.5.8' + java: 17 diff --git a/.github/workflows/iceberg_spark_test.yml b/.github/workflows/iceberg_spark_test_reusable.yml similarity index 50% rename from .github/workflows/iceberg_spark_test.yml rename to .github/workflows/iceberg_spark_test_reusable.yml index 6834fbfe18..56827005aa 100644 --- a/.github/workflows/iceberg_spark_test.yml +++ b/.github/workflows/iceberg_spark_test_reusable.yml @@ -15,54 +15,40 @@ # specific language governing permissions and limitations # under the License. -name: Iceberg Spark SQL Tests +# Reusable Iceberg Spark test workflow. Invoked once per Iceberg version by +# the iceberg_spark_test_.yml caller workflows. Keep all job logic +# here so the per-version callers stay thin. -concurrency: - group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} - cancel-in-progress: true +name: Iceberg Spark SQL Tests (reusable) on: - push: - branches: - - main - paths: - - "native/**/src/**" - - "native/**/Cargo.toml" - - "native/Cargo.lock" - - "!native/hdfs/**" - - "!native/fs-hdfs/**" - - "common/src/main/**" - - "common/pom.xml" - - "spark/src/main/**" - - "!spark/src/main/scala/org/apache/comet/GenerateDocs.scala" - - "spark/pom.xml" - - "dev/diffs/iceberg/**" - - "pom.xml" - - "rust-toolchain.toml" - - ".github/workflows/iceberg_spark_test.yml" - - ".github/actions/setup-builder/**" - - ".github/actions/setup-iceberg-builder/**" - pull_request: - paths: - - "native/**/src/**" - - "native/**/Cargo.toml" - - "native/Cargo.lock" - - "!native/hdfs/**" - - "!native/fs-hdfs/**" - - "common/src/main/**" - - "common/pom.xml" - - "spark/src/main/**" - - "!spark/src/main/scala/org/apache/comet/GenerateDocs.scala" - - "spark/pom.xml" - - "dev/diffs/iceberg/**" - - "pom.xml" - - "rust-toolchain.toml" - - ".github/workflows/iceberg_spark_test.yml" - - ".github/actions/setup-builder/**" - - ".github/actions/setup-iceberg-builder/**" - # manual trigger - # https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow - workflow_dispatch: + workflow_call: + inputs: + iceberg-short: + description: 'Iceberg minor version, e.g. 1.10' + required: true + type: string + iceberg-full: + description: 'Iceberg full version, e.g. 1.10.0' + required: true + type: string + spark-short: + description: 'Spark minor version, e.g. 3.5' + required: true + type: string + spark-full: + description: 'Spark full version, e.g. 3.5.8' + required: true + type: string + java: + description: 'JDK major version, e.g. 17' + required: true + type: number + scala: + description: 'Scala minor version, e.g. 2.13' + required: false + default: '2.13' + type: string env: RUST_VERSION: stable @@ -125,20 +111,8 @@ jobs: iceberg-spark: needs: build-native - strategy: - matrix: - os: [ubuntu-24.04] - iceberg-version: [{short: '1.8', full: '1.8.1'}, {short: '1.9', full: '1.9.1'}, {short: '1.10', full: '1.10.0'}] - spark-version: [{short: '3.4', full: '3.4.3'}, {short: '3.5', full: '3.5.8'}] - scala-version: ['2.13'] - include: - - spark-version: {short: '3.4', full: '3.4.3'} - java-version: 11 - - spark-version: {short: '3.5', full: '3.5.8'} - java-version: 17 - fail-fast: false - name: iceberg-spark/${{ matrix.os }}/iceberg-${{ matrix.iceberg-version.full }}/spark-${{ matrix.spark-version.full }}/scala-${{ matrix.scala-version }}/java-${{ matrix.java-version }} - runs-on: ${{ matrix.os }} + name: iceberg-spark/iceberg-${{ inputs.iceberg-full }}/spark-${{ inputs.spark-full }}/scala-${{ inputs.scala }}/java-${{ inputs.java }} + runs-on: ubuntu-24.04 container: image: amd64/rust env: @@ -148,8 +122,8 @@ jobs: - name: Setup Rust & Java toolchain uses: ./.github/actions/setup-builder with: - rust-version: ${{env.RUST_VERSION}} - jdk-version: ${{ matrix.java-version }} + rust-version: ${{ env.RUST_VERSION }} + jdk-version: ${{ inputs.java }} - name: Download native library uses: actions/download-artifact@v8 with: @@ -157,35 +131,23 @@ jobs: path: native/target/release/ - name: Build Comet run: | - ./mvnw install -Prelease -DskipTests -Pspark-${{ matrix.spark-version.short }} -Pscala-${{ matrix.scala-version }} + ./mvnw install -Prelease -DskipTests -Pspark-${{ inputs.spark-short }} -Pscala-${{ inputs.scala }} - name: Setup Iceberg uses: ./.github/actions/setup-iceberg-builder with: - iceberg-version: ${{ matrix.iceberg-version.full }} + iceberg-version: ${{ inputs.iceberg-full }} - name: Run Iceberg Spark tests run: | cd apache-iceberg rm -rf /root/.m2/repository/org/apache/parquet # somehow parquet cache requires cleanups - ENABLE_COMET=true ENABLE_COMET_ONHEAP=true ./gradlew -DsparkVersions=${{ matrix.spark-version.short }} -DscalaVersion=${{ matrix.scala-version }} -DflinkVersions= -DkafkaVersions= \ - :iceberg-spark:iceberg-spark-${{ matrix.spark-version.short }}_${{ matrix.scala-version }}:test \ + ENABLE_COMET=true ENABLE_COMET_ONHEAP=true ./gradlew -DsparkVersions=${{ inputs.spark-short }} -DscalaVersion=${{ inputs.scala }} -DflinkVersions= -DkafkaVersions= \ + :iceberg-spark:iceberg-spark-${{ inputs.spark-short }}_${{ inputs.scala }}:test \ -Pquick=true -x javadoc iceberg-spark-extensions: needs: build-native - strategy: - matrix: - os: [ubuntu-24.04] - iceberg-version: [{short: '1.8', full: '1.8.1'}, {short: '1.9', full: '1.9.1'}, {short: '1.10', full: '1.10.0'}] - spark-version: [{short: '3.4', full: '3.4.3'}, {short: '3.5', full: '3.5.8'}] - scala-version: ['2.13'] - include: - - spark-version: {short: '3.4', full: '3.4.3'} - java-version: 11 - - spark-version: {short: '3.5', full: '3.5.8'} - java-version: 17 - fail-fast: false - name: iceberg-spark-extensions/${{ matrix.os }}/iceberg-${{ matrix.iceberg-version.full }}/spark-${{ matrix.spark-version.full }}/scala-${{ matrix.scala-version }}/java-${{ matrix.java-version }} - runs-on: ${{ matrix.os }} + name: iceberg-spark-extensions/iceberg-${{ inputs.iceberg-full }}/spark-${{ inputs.spark-full }}/scala-${{ inputs.scala }}/java-${{ inputs.java }} + runs-on: ubuntu-24.04 container: image: amd64/rust env: @@ -195,8 +157,8 @@ jobs: - name: Setup Rust & Java toolchain uses: ./.github/actions/setup-builder with: - rust-version: ${{env.RUST_VERSION}} - jdk-version: ${{ matrix.java-version }} + rust-version: ${{ env.RUST_VERSION }} + jdk-version: ${{ inputs.java }} - name: Download native library uses: actions/download-artifact@v8 with: @@ -204,35 +166,23 @@ jobs: path: native/target/release/ - name: Build Comet run: | - ./mvnw install -Prelease -DskipTests -Pspark-${{ matrix.spark-version.short }} -Pscala-${{ matrix.scala-version }} + ./mvnw install -Prelease -DskipTests -Pspark-${{ inputs.spark-short }} -Pscala-${{ inputs.scala }} - name: Setup Iceberg uses: ./.github/actions/setup-iceberg-builder with: - iceberg-version: ${{ matrix.iceberg-version.full }} + iceberg-version: ${{ inputs.iceberg-full }} - name: Run Iceberg Spark extensions tests run: | cd apache-iceberg rm -rf /root/.m2/repository/org/apache/parquet # somehow parquet cache requires cleanups - ENABLE_COMET=true ENABLE_COMET_ONHEAP=true ./gradlew -DsparkVersions=${{ matrix.spark-version.short }} -DscalaVersion=${{ matrix.scala-version }} -DflinkVersions= -DkafkaVersions= \ - :iceberg-spark:iceberg-spark-extensions-${{ matrix.spark-version.short }}_${{ matrix.scala-version }}:test \ + ENABLE_COMET=true ENABLE_COMET_ONHEAP=true ./gradlew -DsparkVersions=${{ inputs.spark-short }} -DscalaVersion=${{ inputs.scala }} -DflinkVersions= -DkafkaVersions= \ + :iceberg-spark:iceberg-spark-extensions-${{ inputs.spark-short }}_${{ inputs.scala }}:test \ -Pquick=true -x javadoc iceberg-spark-runtime: needs: build-native - strategy: - matrix: - os: [ubuntu-24.04] - iceberg-version: [{short: '1.8', full: '1.8.1'}, {short: '1.9', full: '1.9.1'}, {short: '1.10', full: '1.10.0'}] - spark-version: [{short: '3.4', full: '3.4.3'}, {short: '3.5', full: '3.5.8'}] - scala-version: ['2.13'] - include: - - spark-version: {short: '3.4', full: '3.4.3'} - java-version: 11 - - spark-version: {short: '3.5', full: '3.5.8'} - java-version: 17 - fail-fast: false - name: iceberg-spark-runtime/${{ matrix.os }}/iceberg-${{ matrix.iceberg-version.full }}/spark-${{ matrix.spark-version.full }}/scala-${{ matrix.scala-version }}/java-${{ matrix.java-version }} - runs-on: ${{ matrix.os }} + name: iceberg-spark-runtime/iceberg-${{ inputs.iceberg-full }}/spark-${{ inputs.spark-full }}/scala-${{ inputs.scala }}/java-${{ inputs.java }} + runs-on: ubuntu-24.04 container: image: amd64/rust env: @@ -242,8 +192,8 @@ jobs: - name: Setup Rust & Java toolchain uses: ./.github/actions/setup-builder with: - rust-version: ${{env.RUST_VERSION}} - jdk-version: ${{ matrix.java-version }} + rust-version: ${{ env.RUST_VERSION }} + jdk-version: ${{ inputs.java }} - name: Download native library uses: actions/download-artifact@v8 with: @@ -251,15 +201,15 @@ jobs: path: native/target/release/ - name: Build Comet run: | - ./mvnw install -Prelease -DskipTests -Pspark-${{ matrix.spark-version.short }} -Pscala-${{ matrix.scala-version }} + ./mvnw install -Prelease -DskipTests -Pspark-${{ inputs.spark-short }} -Pscala-${{ inputs.scala }} - name: Setup Iceberg uses: ./.github/actions/setup-iceberg-builder with: - iceberg-version: ${{ matrix.iceberg-version.full }} + iceberg-version: ${{ inputs.iceberg-full }} - name: Run Iceberg Spark runtime tests run: | cd apache-iceberg rm -rf /root/.m2/repository/org/apache/parquet # somehow parquet cache requires cleanups - ENABLE_COMET=true ENABLE_COMET_ONHEAP=true ./gradlew -DsparkVersions=${{ matrix.spark-version.short }} -DscalaVersion=${{ matrix.scala-version }} -DflinkVersions= -DkafkaVersions= \ - :iceberg-spark:iceberg-spark-runtime-${{ matrix.spark-version.short }}_${{ matrix.scala-version }}:integrationTest \ + ENABLE_COMET=true ENABLE_COMET_ONHEAP=true ./gradlew -DsparkVersions=${{ inputs.spark-short }} -DscalaVersion=${{ inputs.scala }} -DflinkVersions= -DkafkaVersions= \ + :iceberg-spark:iceberg-spark-runtime-${{ inputs.spark-short }}_${{ inputs.scala }}:integrationTest \ -Pquick=true -x javadoc diff --git a/docs/source/contributor-guide/iceberg-spark-tests.md b/docs/source/contributor-guide/iceberg-spark-tests.md index f37ee4f1f3..fddf7051cd 100644 --- a/docs/source/contributor-guide/iceberg-spark-tests.md +++ b/docs/source/contributor-guide/iceberg-spark-tests.md @@ -93,6 +93,8 @@ diff must be generated against its own tag. ## Running Tests in CI -The `iceberg_spark_test.yml` workflow applies these diffs and runs the three Gradle targets above against -each Iceberg version. The test matrix covers Spark 3.4 and 3.5 across Iceberg 1.8.1, 1.9.1, and 1.10.0 -with Java 11 and 17. The workflow runs on all pull requests and pushes to the main branch. +The `iceberg_spark_test_.yml` workflows apply these diffs and run the three Gradle targets above +against each Iceberg version. Iceberg 1.8.1 runs against Spark 3.4.3 with Java 11; Iceberg 1.9.1 and 1.10.0 +run against Spark 3.5.8 with Java 17. The latest Iceberg version (1.10) runs on every pull request and on +pushes to main; the older versions (1.8, 1.9) run only on pushes to main. All caller workflows delegate to +`iceberg_spark_test_reusable.yml`, which holds the build and test job logic.