diff --git a/.ci/docker/requirements-ci.txt b/.ci/docker/requirements-ci.txt index c82882d56e6..a29232c6386 100644 --- a/.ci/docker/requirements-ci.txt +++ b/.ci/docker/requirements-ci.txt @@ -11,6 +11,7 @@ zstd==1.5.5.1 pandas>=2.2.2; python_version >= '3.10' pytest==7.2.0 pytest-cov==4.1.0 +pytest-timeout==2.2.0 expecttest==0.1.6 hypothesis==6.84.2 parameterized==0.9.0 diff --git a/.ci/scripts/unittest-macos-cmake.sh b/.ci/scripts/unittest-macos-cmake.sh index 43eb1f21c3c..e4072c30db1 100755 --- a/.ci/scripts/unittest-macos-cmake.sh +++ b/.ci/scripts/unittest-macos-cmake.sh @@ -12,8 +12,13 @@ set -eux export TORCHINDUCTOR_CACHE_DIR="$(mktemp -d "${RUNNER_TEMP:-/tmp}/torchinductor_cache_XXXXXX")" trap 'rm -rf "${TORCHINDUCTOR_CACHE_DIR}"' EXIT -# Run pytest with coverage -${CONDA_RUN} pytest -n auto --cov=./ --cov-report=xml +# EXPERIMENT: run without xdist entirely so output is unbuffered and each test +# name prints immediately (with -n 1, xdist still buffers all output in a +# worker process, hiding which test is hanging). -v prints test names as they +# start; faulthandler_timeout dumps threads if a single test stalls. +${CONDA_RUN} pytest -p no:xdist -v --cov=./ --cov-report=xml \ + --timeout=1500 --timeout-method=thread \ + -o faulthandler_timeout=180 # Run gtest LLVM_PROFDATA="xcrun llvm-profdata" LLVM_COV="xcrun llvm-cov" \ ${CONDA_RUN} test/run_oss_cpp_tests.sh diff --git a/.github/workflows/_unittest.yml b/.github/workflows/_unittest.yml index 15c87bd79e4..e63a6bc518c 100644 --- a/.github/workflows/_unittest.yml +++ b/.github/workflows/_unittest.yml @@ -49,6 +49,7 @@ jobs: python-version: '3.11' submodules: 'recursive' ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + timeout: 90 script: | set -eux # This is needed to get the prebuilt PyTorch wheel from S3