From 8b9f9666d8fd2853241c81fff88860271a121382 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Thu, 12 Feb 2026 09:14:32 +0100 Subject: [PATCH 1/4] ci: change to uv add to install dependencies with single solve tests: partial fix of tests failing due to anndata-git and pandas >=3 --- .github/workflows/release.yaml | 4 ++-- .github/workflows/test.yaml | 22 ++++++++++------------ .readthedocs.yaml | 29 ++++++++++++++++------------- pyproject.toml | 23 ++++++++++++----------- tests/core/test_centroids.py | 2 +- tests/io/test_readwrite.py | 23 +++++++++++------------ 6 files changed, 52 insertions(+), 51 deletions(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 299ed867b..18493bca3 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -9,9 +9,9 @@ jobs: runs-on: ubuntu-latest if: startsWith(github.ref, 'refs/tags/v') steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v6 - name: Set up Python 3.12 - uses: actions/setup-python@v4 + uses: actions/setup-python@v6 with: python-version: "3.12" cache: pip diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 127986287..efd31ef3f 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -13,7 +13,7 @@ jobs: runs-on: ${{ matrix.os }} defaults: run: - shell: bash -e {0} + shell: bash # bash also on windows strategy: fail-fast: false @@ -32,8 +32,8 @@ jobs: PRERELEASE: ${{ matrix.prerelease }} steps: - - uses: actions/checkout@v2 - - uses: astral-sh/setup-uv@v5 + - uses: actions/checkout@v6 + - uses: astral-sh/setup-uv@v7 id: setup-uv with: version: "latest" @@ -41,20 +41,18 @@ jobs: - name: Install dependencies run: | if [[ "${PRERELEASE}" == "allow" ]]; then - uv sync --extra test - : # uv sync --extra test --prerelease ${PRERELEASE} - uv pip install git+https://github.com/scverse/anndata.git - uv pip install --prerelease allow pandas - else - uv sync --extra test + sed -i '' 's/requires-python.*//' pyproject.toml # otherwise uv complains that anndata requires python>=3.12 and we only do >=3.11 😱 + uv add git+https://github.com/scverse/anndata.git + uv add pandas>=3.dev0 fi if [[ -n "${DASK_VERSION}" ]]; then if [[ "${DASK_VERSION}" == "latest" ]]; then - uv pip install --upgrade dask + uv add dask else - uv pip install dask==${DASK_VERSION} + uv add dask==${DASK_VERSION} fi fi + uv sync --group=test - name: Test env: MPLBACKEND: agg @@ -63,7 +61,7 @@ jobs: run: | uv run pytest --cov --color=yes --cov-report=xml - name: Upload coverage to Codecov - uses: codecov/codecov-action@v4 + uses: codecov/codecov-action@v5 with: name: coverage verbose: true diff --git a/.readthedocs.yaml b/.readthedocs.yaml index acecf90e6..bea845657 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -1,19 +1,22 @@ # https://docs.readthedocs.io/en/stable/config-file/v2.html version: 2 build: - os: ubuntu-20.04 - tools: - python: "3.11" -sphinx: - configuration: docs/conf.py - fail_on_warning: true -python: - install: - - method: pip - path: . - extra_requirements: - - docs - - torch + os: ubuntu-24.04 + tools: + python: '3.13' + jobs: + post_checkout: + # unshallow so version can be derived from tag + - git fetch --unshallow || true + create_environment: + - asdf plugin add uv + - asdf install uv latest + - asdf global uv latest + build: + html: + - uv sync --group=docs --group=torch + - make --directory=docs build + - mv docs/_build $READTHEDOCS_OUTPUT submodules: include: - "docs/tutorials/notebooks" diff --git a/pyproject.toml b/pyproject.toml index 89e9e0235..77b913b91 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,8 +51,17 @@ dependencies = [ "xarray-spatial>=0.3.5", "zarr>=3.0.0", ] - [project.optional-dependencies] +torch = [ + "torch" +] +extra = [ + "napari-spatialdata[all]", + "spatialdata-plot", + "spatialdata-io", +] + +[dependency-groups] dev = [ "bump2version", "sentry-prevent-cli", @@ -80,14 +89,6 @@ benchmark = [ "asv", "memray", ] -torch = [ - "torch" -] -extra = [ - "napari-spatialdata[all]", - "spatialdata-plot", - "spatialdata-io", -] [tool.coverage.run] source = ["spatialdata"] @@ -95,9 +96,9 @@ omit = [ "**/test_*.py", ] -[tool.pytest.ini_options] +[tool.pytest] testpaths = ["tests"] -xfail_strict = true +strict = true addopts = [ # "-Werror", # if 3rd party libs raise DeprecationWarnings, just use filterwarnings below "--import-mode=importlib", # allow using test files with same name diff --git a/tests/core/test_centroids.py b/tests/core/test_centroids.py index aa332f9da..9679c3ff1 100644 --- a/tests/core/test_centroids.py +++ b/tests/core/test_centroids.py @@ -183,7 +183,7 @@ def test_get_centroids_invalid_element(images): region_key="region", instance_key="instance_id", ) - with pytest.raises(ValueError, match="The object type is not supported."): + with pytest.raises(ValueError, match=r"The object type is not supported"): get_centroids(adata) diff --git a/tests/io/test_readwrite.py b/tests/io/test_readwrite.py index af028d29c..9d191f0d9 100644 --- a/tests/io/test_readwrite.py +++ b/tests/io/test_readwrite.py @@ -1067,7 +1067,7 @@ def test_read_sdata(tmp_path: Path, points: SpatialData) -> None: assert_spatial_data_objects_are_identical(sdata_from_path, sdata_from_zarr_group) -def test_sdata_with_nan_in_obs() -> None: +def test_sdata_with_nan_in_obs(tmp_path: Path) -> None: """Test writing SpatialData with mixed string/NaN values in obs works correctly. Regression test for https://github.com/scverse/spatialdata/issues/399 @@ -1096,14 +1096,13 @@ def test_sdata_with_nan_in_obs() -> None: assert sdata["table"].obs["column_only_region1"].iloc[1] is np.nan assert np.isnan(sdata["table"].obs["column_only_region2"].iloc[0]) - with tempfile.TemporaryDirectory() as tmpdir: - path = os.path.join(tmpdir, "data.zarr") - sdata.write(path) - - sdata2 = SpatialData.read(path) - assert "column_only_region1" in sdata2["table"].obs.columns - assert sdata2["table"].obs["column_only_region1"].iloc[0] == "string" - assert sdata2["table"].obs["column_only_region2"].iloc[1] == 3 - # After round-trip, NaN in object-dtype column becomes string "nan" - assert sdata2["table"].obs["column_only_region1"].iloc[1] == "nan" - assert np.isnan(sdata2["table"].obs["column_only_region2"].iloc[0]) + path = tmp_path / "data.zarr" + sdata.write(path) + + sdata2 = SpatialData.read(path) + assert "column_only_region1" in sdata2["table"].obs.columns + assert sdata2["table"].obs["column_only_region1"].iloc[0] == "string" + assert sdata2["table"].obs["column_only_region2"].iloc[1] == 3 + # After round-trip, NaN in object-dtype column becomes string "nan" + assert sdata2["table"].obs["column_only_region1"].iloc[1] == "nan" + assert np.isnan(sdata2["table"].obs["column_only_region2"].iloc[0]) From 72912d1f892382fbded3c9dd9fc29937cc835386 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Thu, 12 Feb 2026 09:23:47 +0100 Subject: [PATCH 2/4] test: fix remainder tests failing due to anndata-git, pandas>=3 and dask 2025.2.0. fix tests test: adjust test to account for dask issues --- tests/io/test_pyramids_performance.py | 6 +++++- tests/io/test_readwrite.py | 16 +++++++++++----- tests/models/test_models.py | 13 +++++++------ 3 files changed, 23 insertions(+), 12 deletions(-) diff --git a/tests/io/test_pyramids_performance.py b/tests/io/test_pyramids_performance.py index 875879541..581bf6833 100644 --- a/tests/io/test_pyramids_performance.py +++ b/tests/io/test_pyramids_performance.py @@ -88,4 +88,8 @@ def test_write_image_multiscale_performance(sdata_with_image: SpatialData, tmp_p num_chunks_all_scales.item(), num_chunks_all_scales.item() + 1, } - assert actual_num_chunk_reads == num_chunks_scale0.item() + # We set a range here as with certain dask versions more reads occur. This checks whether the range is still + # acceptable, if not then we can check whether it is due to SpatialData or Dask and act accordingly. + # In addition, we could do use a mock side effect to check that the entry points from within spatialdata are within + # the expected range. + assert actual_num_chunk_reads in range(0, num_chunks_scale0.item() * 2 + 1) diff --git a/tests/io/test_readwrite.py b/tests/io/test_readwrite.py index 9d191f0d9..e6d23eee2 100644 --- a/tests/io/test_readwrite.py +++ b/tests/io/test_readwrite.py @@ -13,6 +13,7 @@ import zarr from anndata import AnnData from numpy.random import default_rng +from packaging.version import Version from shapely import MultiPolygon, Polygon from upath import UPath from zarr.errors import GroupNotFoundError @@ -1101,8 +1102,13 @@ def test_sdata_with_nan_in_obs(tmp_path: Path) -> None: sdata2 = SpatialData.read(path) assert "column_only_region1" in sdata2["table"].obs.columns - assert sdata2["table"].obs["column_only_region1"].iloc[0] == "string" - assert sdata2["table"].obs["column_only_region2"].iloc[1] == 3 - # After round-trip, NaN in object-dtype column becomes string "nan" - assert sdata2["table"].obs["column_only_region1"].iloc[1] == "nan" - assert np.isnan(sdata2["table"].obs["column_only_region2"].iloc[0]) + r1 = sdata2["table"].obs["column_only_region1"] + r2 = sdata2["table"].obs["column_only_region2"] + + assert r1.iloc[0] == "string" + assert r2.iloc[1] == 3 + if Version(pd.__version__) >= Version("3"): + assert pd.isna(r1.iloc[1]) + else: # After round-trip, NaN in object-dtype column becomes string "nan" on pandas 2 + assert r1.iloc[1] == "nan" + assert np.isnan(r2.iloc[0]) diff --git a/tests/models/test_models.py b/tests/models/test_models.py index c4ac3347f..7382a9a3b 100644 --- a/tests/models/test_models.py +++ b/tests/models/test_models.py @@ -18,6 +18,7 @@ from dask.dataframe import DataFrame as DaskDataFrame from geopandas import GeoDataFrame from numpy.random import default_rng +from packaging.version import Version from shapely.geometry import MultiPolygon, Point, Polygon from shapely.io import to_ragged_array from spatial_image import to_spatial_image @@ -311,7 +312,7 @@ def test_shapes_model(self, model: ShapesModel, path: Path) -> None: @pytest.mark.parametrize("model", [PointsModel]) @pytest.mark.parametrize("instance_key", [None, "cell_id"]) @pytest.mark.parametrize("feature_key", [None, "target"]) - @pytest.mark.parametrize("typ", [np.ndarray, pd.DataFrame, dd.DataFrame]) + @pytest.mark.parametrize("typ", [np.ndarray, pd.DataFrame, dd.DataFrame], ids=["numpy", "pandas", "dask"]) @pytest.mark.parametrize("is_annotation", [True, False]) @pytest.mark.parametrize("is_3d", [True, False]) @pytest.mark.parametrize("coordinates", [None, {"x": "A", "y": "B", "z": "C"}]) @@ -937,12 +938,12 @@ def test_categories_on_partitioned_dataframe(sdata_blobs: SpatialData): assert np.array_equal(df["genes"].to_numpy(), ddf_parsed["genes"].compute().to_numpy()) assert set(df["genes"].cat.categories.tolist()) == set(ddf_parsed["genes"].compute().cat.categories.tolist()) - # two behavior to investigate later/report to dask (they originate in dask) - # TODO: df['genes'].cat.categories has dtype 'object', while ddf_parsed['genes'].compute().cat.categories has dtype - # 'string' - # this problem should disappear after pandas 3.0 is released - assert df["genes"].cat.categories.dtype == "object" + if Version(pd.__version__) >= Version("3"): + assert df["genes"].cat.categories.dtype == "string" + else: + assert df["genes"].cat.categories.dtype == "object" assert ddf_parsed["genes"].compute().cat.categories.dtype == "string" + # behavior to investigate later/report to dask # TODO: the list of categories are not preserving the order assert df["genes"].cat.categories.tolist() != ddf_parsed["genes"].compute().cat.categories.tolist() From 4a3d029b050cd058ad0f3711e8276bb96a6dc22e Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Thu, 12 Feb 2026 09:26:36 +0100 Subject: [PATCH 3/4] test: add array_api marker to be ignored. --- pyproject.toml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 77b913b91..fb06b8611 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -100,7 +100,6 @@ omit = [ testpaths = ["tests"] strict = true addopts = [ -# "-Werror", # if 3rd party libs raise DeprecationWarnings, just use filterwarnings below "--import-mode=importlib", # allow using test files with same name "-s", # print output from tests ] @@ -108,11 +107,13 @@ addopts = [ markers = [ "slow: marks tests as slow (deselect with '-m \"not slow\"')", "gpu: run test on GPU using CuPY.", + "array_api: used by anndata.tests.helpers, not us", "skip_with_pyarrow_strings: skipwhen pyarrow string conversion is turned on", ] # info on how to use this https://stackoverflow.com/questions/57925071/how-do-i-avoid-getting-deprecationwarning-from-inside-dependencies-with-pytest filterwarnings = [ - # "ignore:.*U.*mode is deprecated:DeprecationWarning", + # "error", # if 3rd party libs raise DeprecationWarnings, TODO: filter them individually below + # "ignore:.*U.*mode is deprecated:DeprecationWarning", ] [tool.jupytext] From a13ac1124bb2b562ae2af2a8f4807d5beed33c0d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 12 Feb 2026 08:14:58 +0000 Subject: [PATCH 4/4] docs_build: use uv instead of make for docs build fix docs attempt fix docs attempt 2 fix docs --- .readthedocs.yaml | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index bea845657..d49a2c156 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -1,22 +1,22 @@ # https://docs.readthedocs.io/en/stable/config-file/v2.html version: 2 build: - os: ubuntu-24.04 - tools: - python: '3.13' - jobs: - post_checkout: - # unshallow so version can be derived from tag - - git fetch --unshallow || true - create_environment: - - asdf plugin add uv - - asdf install uv latest - - asdf global uv latest - build: - html: - - uv sync --group=docs --group=torch - - make --directory=docs build - - mv docs/_build $READTHEDOCS_OUTPUT + os: ubuntu-24.04 + tools: + python: "3.13" + jobs: + post_checkout: + # unshallow so version can be derived from tag + - git fetch --unshallow || true + create_environment: + - asdf plugin add uv + - asdf install uv latest + - asdf global uv latest + build: + html: + - uv sync --group=docs --extra=torch + - uv run make --directory=docs html + - mv docs/_build $READTHEDOCS_OUTPUT submodules: include: - "docs/tutorials/notebooks"