From 4ede53ab2b2da6f578ab0bfc832e7022639d3d26 Mon Sep 17 00:00:00 2001 From: Sebastian Baunsgaard Date: Mon, 8 Jun 2026 23:04:24 +0000 Subject: [PATCH] Retry Maven test-compile on transient repository download errors CI jobs intermittently fail at startup when Maven Central returns a transient error (e.g. HTTP 403 while resolving the Apache parent POM), which is unrelated to the code under test. Retry test-compile once after a short pause when the failure is a "Could not transfer artifact" download error, while genuine compilation and test failures still fail fast. Also point the test action at the entrypoint script in the mounted workspace so changes to docker/entrypoint.sh take effect immediately without rebuilding and republishing apache/systemds:testing-latest. Failing job that motivated this change: https://github.com/apache/systemds/actions/runs/27167144065/job/80197161169 --- .github/action/action.yml | 4 ++++ docker/entrypoint.sh | 23 ++++++++++++++++++++++- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/.github/action/action.yml b/.github/action/action.yml index 57454cf0510..406367567a0 100644 --- a/.github/action/action.yml +++ b/.github/action/action.yml @@ -28,5 +28,9 @@ inputs: runs: using: 'docker' image: 'Dockerfile' + # Run the entrypoint from the mounted workspace rather than the copy baked + # into the image, so changes to docker/entrypoint.sh take effect immediately + # without rebuilding and republishing apache/systemds:testing-latest. + entrypoint: '/github/workspace/docker/entrypoint.sh' args: - ${{ inputs.test-to-run }} diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh index 9a77766b3a3..53dfabb96e6 100755 --- a/docker/entrypoint.sh +++ b/docker/entrypoint.sh @@ -28,8 +28,29 @@ cd /github/workspace export MAVEN_OPTS="-Xmx512m" +# Printed when Maven fails to download an artifact (transient repo/network +# error), unlike genuine compilation or test failures which fail fast. +transient_mvn_error="Could not transfer artifact" + log="/tmp/sysdstest.log" -mvn -ntp -B test-compile 2>&1 | stdbuf -oL grep -E "BUILD|Total time:|---|Building SystemDS" +compile_log="$(mktemp)" +# test-compile downloads all dependencies; retry once on a transient repo +# error so the test run below can resolve them from the local cache. +mvn -ntp -B test-compile 2>&1 | tee "$compile_log" | stdbuf -oL grep -E "BUILD|Total time:|---|Building SystemDS" +compile_status=${PIPESTATUS[0]} + +# True only when test-compile failed because of a transient repository download. +compile_transient_failure=false +[ "$compile_status" -ne 0 ] && grep -qE "$transient_mvn_error" "$compile_log" && compile_transient_failure=true +rm -f "$compile_log" + +if [ "$compile_transient_failure" = true ]; then + echo "Transient Maven repository error; retrying test-compile in 15s..." + sleep 15 + mvn -ntp -B test-compile 2>&1 | stdbuf -oL grep -E "BUILD|Total time:|---|Building SystemDS" +else + echo "No transient Maven repository error detected; no retry needed." +fi mvn -ntp -B test -D maven.test.skip=false -D automatedtestbase.outputbuffering=true -D test=$1 2>&1 \ | stdbuf -oL grep -Ev "already exists in destination.|Using incubator" \ | tee $log