From 2c32f92b2b42a328cdf6ab8297195d18c31d4ac0 Mon Sep 17 00:00:00 2001 From: SS-JIA Date: Fri, 29 May 2026 15:29:54 -0400 Subject: [PATCH] Skip AOTI tests on macOS CI and bump job timeout to 120 min Summary: AOTI tests (llama3_2_vision and select extension/llm tests) hang indefinitely on macOS CI runners after the PyTorch 2.12 pin update. The hang is in native C/C++ code (inductor compilation / dlopen), which prevents faulthandler from producing a traceback. Diagnosis is ongoing in #19886. Skip the affected tests and bump the macOS job timeout from the default 90 to 120 minutes to add margin (observed completion at ~79 min with skips applied). Co-Authored-By: Claude --- .ci/scripts/unittest-macos-cmake.sh | 15 +++++++++++++-- .github/workflows/_unittest.yml | 1 + 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/.ci/scripts/unittest-macos-cmake.sh b/.ci/scripts/unittest-macos-cmake.sh index 43eb1f21c3c..48f072a0cc1 100755 --- a/.ci/scripts/unittest-macos-cmake.sh +++ b/.ci/scripts/unittest-macos-cmake.sh @@ -12,8 +12,19 @@ set -eux export TORCHINDUCTOR_CACHE_DIR="$(mktemp -d "${RUNNER_TEMP:-/tmp}/torchinductor_cache_XXXXXX")" trap 'rm -rf "${TORCHINDUCTOR_CACHE_DIR}"' EXIT -# Run pytest with coverage -${CONDA_RUN} pytest -n auto --cov=./ --cov-report=xml +# TODO(SS-JIA): AOTI tests hang on macOS CI runners — the thread blocks in +# native C/C++ code (dlopen / inductor compilation) so faulthandler cannot +# even produce a traceback. Diagnosis ongoing in #19886. +AOTI_SKIPS=( + --ignore=examples/models/llama3_2_vision/preprocess/test_preprocess.py + --ignore=examples/models/llama3_2_vision/vision_encoder/test/test_vision_encoder.py + --ignore=examples/models/llama3_2_vision/text_decoder/test/test_text_decoder.py + --deselect=extension/llm/modules/test/test_position_embeddings.py::TilePositionalEmbeddingTest::test_tile_positional_embedding_aoti + --deselect=extension/llm/modules/test/test_position_embeddings.py::TiledTokenPositionalEmbeddingTest::test_tiled_token_positional_embedding_aoti + --deselect=extension/llm/modules/test/test_attention.py::AttentionTest::test_attention_aoti +) + +${CONDA_RUN} pytest -n auto --cov=./ --cov-report=xml "${AOTI_SKIPS[@]}" # Run gtest LLVM_PROFDATA="xcrun llvm-profdata" LLVM_COV="xcrun llvm-cov" \ ${CONDA_RUN} test/run_oss_cpp_tests.sh diff --git a/.github/workflows/_unittest.yml b/.github/workflows/_unittest.yml index 15c87bd79e4..a253857d2c0 100644 --- a/.github/workflows/_unittest.yml +++ b/.github/workflows/_unittest.yml @@ -49,6 +49,7 @@ jobs: python-version: '3.11' submodules: 'recursive' ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + timeout: 120 script: | set -eux # This is needed to get the prebuilt PyTorch wheel from S3