Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 47 additions & 0 deletions src/zarr/core/chunk_grids.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from typing import TYPE_CHECKING, Any, Literal, Self, TypeAlias, TypedDict, Union, cast

import numpy as np
import numpy.typing as npt

import zarr
from zarr.abc.metadata import Metadata
Expand Down Expand Up @@ -427,6 +428,28 @@ def array_index_to_chunk_coord(
Coordinates of the chunk containing the array index.
"""

@abstractmethod
def array_indices_to_chunk_dim(
self, array_shape: tuple[int, ...], dim: int, indices: npt.NDArray[np.intp]
) -> npt.NDArray[np.intp]:
"""
Map an array of indices along one dimension to chunk coordinates (vectorized).

Parameters
----------
array_shape : tuple[int, ...]
Shape of the full array.
dim : int
Dimension index.
indices : np.ndarray
Array of indices along the given dimension.

Returns
-------
np.ndarray
Array of chunk coordinates, same shape as indices.
"""

@abstractmethod
def chunks_per_dim(self, array_shape: tuple[int, ...], dim: int) -> int:
"""
Expand Down Expand Up @@ -534,6 +557,19 @@ def array_index_to_chunk_coord(
for idx, size in zip(array_index, self.chunk_shape, strict=False)
)

def array_indices_to_chunk_dim(
self, array_shape: tuple[int, ...], dim: int, indices: npt.NDArray[np.intp]
) -> npt.NDArray[np.intp]:
"""
Vectorized mapping of array indices to chunk coordinates along one dimension.

For RegularChunkGrid, this is simply indices // chunk_size.
"""
chunk_size = self.chunk_shape[dim]
if chunk_size == 0:
return np.zeros_like(indices)
return indices // chunk_size

def chunks_per_dim(self, array_shape: tuple[int, ...], dim: int) -> int:
"""
Get the number of chunks along a specific dimension.
Expand Down Expand Up @@ -1071,6 +1107,17 @@ def array_index_to_chunk_coord(

return tuple(result)

def array_indices_to_chunk_dim(
self, array_shape: tuple[int, ...], dim: int, indices: npt.NDArray[np.intp]
) -> npt.NDArray[np.intp]:
"""
Vectorized mapping of array indices to chunk coordinates along one dimension.

For RectilinearChunkGrid, uses np.searchsorted on cumulative sizes.
"""
cumsum = np.asarray(self._cumulative_sizes[dim])
return np.searchsorted(cumsum, indices, side="right").astype(np.intp) - 1

def chunks_in_selection(
self, array_shape: tuple[int, ...], selection: tuple[slice, ...]
) -> Iterator[tuple[int, ...]]:
Expand Down
24 changes: 5 additions & 19 deletions src/zarr/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -864,12 +864,7 @@ def __init__(
boundscheck_indices(dim_sel, dim_len)

# determine which chunk is needed for each selection item
# Use chunk grid to map each index to its chunk coordinate
dim_sel_chunk = np.empty(len(dim_sel), dtype=np.intp)
for i, idx in enumerate(dim_sel):
full_index = tuple(int(idx) if j == dim else 0 for j in range(len(array_shape)))
chunk_coords = chunk_grid.array_index_to_chunk_coord(array_shape, full_index)
dim_sel_chunk[i] = chunk_coords[dim]
dim_sel_chunk = chunk_grid.array_indices_to_chunk_dim(array_shape, dim, dim_sel)

# determine order of indices
if order == Order.UNKNOWN:
Expand Down Expand Up @@ -1315,19 +1310,10 @@ def __init__(
selection_broadcast = tuple(dim_sel.reshape(-1) for dim_sel in selection_broadcast)

# compute chunk index for each point in the selection using chunk grid
# For each point, we need to find which chunk it belongs to
npoints = selection_broadcast[0].size
chunks_multi_index_list = []
for dim in range(len(shape)):
dim_chunk_indices = np.empty(npoints, dtype=np.intp)
for i in range(npoints):
# Build full coordinate for this point
point_coords = tuple(int(selection_broadcast[d][i]) for d in range(len(shape)))
# Map to chunk coordinates
chunk_coords = chunk_grid.array_index_to_chunk_coord(shape, point_coords)
dim_chunk_indices[i] = chunk_coords[dim]
chunks_multi_index_list.append(dim_chunk_indices)
chunks_multi_index_broadcast = tuple(chunks_multi_index_list)
chunks_multi_index_broadcast = tuple(
chunk_grid.array_indices_to_chunk_dim(shape, dim, selection_broadcast[dim])
for dim in range(len(shape))
)

# ravel chunk indices
chunks_raveled_indices = np.ravel_multi_index(
Expand Down
49 changes: 49 additions & 0 deletions tests/test_chunk_grids/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,3 +95,52 @@ def test_normalize_chunks_dask_style_irregular_multiple_dims() -> None:

# Should use first chunk size from each dimension
assert result == (10, 20)


# =============================================================================
# Tests for _is_nested_sequence()
# =============================================================================


def test_is_nested_sequence_basic() -> None:
"""Test _is_nested_sequence with typical inputs."""
from zarr.core.chunk_grids import _is_nested_sequence

# Nested sequences → True
assert _is_nested_sequence([[10, 20], [5, 5]]) is True
assert _is_nested_sequence([(10, 20), (5, 5)]) is True
assert _is_nested_sequence(([10, 20], [5, 5])) is True

# Flat sequences → False
assert _is_nested_sequence((10, 10)) is False
assert _is_nested_sequence([10, 10]) is False


def test_is_nested_sequence_non_sequences() -> None:
"""Test _is_nested_sequence with non-sequence types."""
from zarr.core.chunk_grids import _is_nested_sequence

assert _is_nested_sequence(10) is False
assert _is_nested_sequence("auto") is False
assert _is_nested_sequence(None) is False
assert _is_nested_sequence(3.14) is False


def test_is_nested_sequence_chunk_grid_instance() -> None:
"""Test _is_nested_sequence with ChunkGrid instances."""
from zarr.core.chunk_grids import RectilinearChunkGrid, RegularChunkGrid, _is_nested_sequence

assert _is_nested_sequence(RegularChunkGrid(chunk_shape=(10, 10))) is False
assert _is_nested_sequence(RectilinearChunkGrid(chunk_shapes=[[10, 20], [5, 5]])) is False


def test_is_nested_sequence_empty_iterables() -> None:
"""Test _is_nested_sequence with empty iterables.

Empty sequences return False because there's no first element to inspect.
"""
from zarr.core.chunk_grids import _is_nested_sequence

assert _is_nested_sequence([]) is False
assert _is_nested_sequence(()) is False
assert _is_nested_sequence([[]]) is True # outer has one element which is a list
139 changes: 139 additions & 0 deletions tests/test_chunk_grids/test_rectilinear.py
Original file line number Diff line number Diff line change
Expand Up @@ -1804,3 +1804,142 @@ def test_invalid_chunk_coordinates_raise() -> None:

with pytest.raises(IndexError):
grid.get_chunk_start(array_shape, (0, 2)) # Only 2 chunks in dim 1 (0,1)


# ===================================================================
# End-to-end indexing tests through full zarr Array pipeline
# (SliceDimIndexer, IntDimIndexer, etc. with rectilinear grids)
# ===================================================================


def test_slice_at_exact_chunk_boundaries() -> None:
"""Test slicing exactly at rectilinear chunk boundaries.

This exercises the SliceDimIndexer code path end-to-end, verifying
that the refactored boundary calculation (stop-1 → +1 pattern) is correct.
Chunk boundaries for rows: [0:10, 10:30, 30:60]
Chunk boundaries for cols: [0:25, 25:50, 50:75, 75:100]
"""
store = MemoryStore()
data = np.arange(6000, dtype="float64").reshape(60, 100)
arr = zarr.create_array(
store=store,
shape=(60, 100),
chunks=[[10, 20, 30], [25, 25, 25, 25]],
dtype="float64",
zarr_format=3,
)
arr[:] = data

# Slice exactly one chunk (second row-chunk, first col-chunk)
np.testing.assert_array_equal(arr[10:30, 0:25], data[10:30, 0:25])

# Slice exactly at a boundary start
np.testing.assert_array_equal(arr[30:60, 75:100], data[30:60, 75:100])

# Slice spanning exactly two chunk boundaries
np.testing.assert_array_equal(arr[0:30, 0:50], data[0:30, 0:50])
np.testing.assert_array_equal(arr[10:60, 25:75], data[10:60, 25:75])

# Single-element slices at each boundary
np.testing.assert_array_equal(arr[9:10, :], data[9:10, :]) # last row of chunk 0
np.testing.assert_array_equal(arr[10:11, :], data[10:11, :]) # first row of chunk 1
np.testing.assert_array_equal(arr[29:30, :], data[29:30, :]) # last row of chunk 1
np.testing.assert_array_equal(arr[30:31, :], data[30:31, :]) # first row of chunk 2


def test_strided_slicing_rectilinear() -> None:
"""Test slicing with step > 1 on rectilinear arrays."""
store = MemoryStore()
data = np.arange(6000, dtype="float64").reshape(60, 100)
arr = zarr.create_array(
store=store,
shape=(60, 100),
chunks=[[10, 20, 30], [25, 25, 25, 25]],
dtype="float64",
zarr_format=3,
)
arr[:] = data

np.testing.assert_array_equal(arr[::2, ::3], data[::2, ::3])
np.testing.assert_array_equal(arr[5:55:5, 10:90:10], data[5:55:5, 10:90:10])
np.testing.assert_array_equal(arr[::7, ::13], data[::7, ::13])


def test_fancy_indexing_rectilinear() -> None:
"""Test oindex and vindex on rectilinear arrays through the full Array pipeline."""
store = MemoryStore()
data = np.arange(6000, dtype="float64").reshape(60, 100)
arr = zarr.create_array(
store=store,
shape=(60, 100),
chunks=[[10, 20, 30], [25, 25, 25, 25]],
dtype="float64",
zarr_format=3,
)
arr[:] = data

# oindex: orthogonal indexing with integer arrays spanning multiple chunks
rows = np.array([0, 9, 10, 29, 30, 59])
cols = np.array([0, 24, 25, 49, 50, 99])
np.testing.assert_array_equal(arr.oindex[rows, cols], data[np.ix_(rows, cols)])

# vindex: coordinate-based indexing
r = np.array([0, 10, 30, 59])
c = np.array([0, 25, 50, 99])
np.testing.assert_array_equal(arr.vindex[r, c], data[r, c])

# oindex with slice on one axis, int array on other
np.testing.assert_array_equal(arr.oindex[rows, 50:75], data[np.ix_(rows, np.arange(50, 75))])
np.testing.assert_array_equal(arr.oindex[10:30, cols], data[np.ix_(np.arange(10, 30), cols)])


def test_boolean_mask_indexing_rectilinear() -> None:
"""Test boolean mask indexing on rectilinear arrays."""
store = MemoryStore()
data = np.arange(6000, dtype="float64").reshape(60, 100)
arr = zarr.create_array(
store=store,
shape=(60, 100),
chunks=[[10, 20, 30], [25, 25, 25, 25]],
dtype="float64",
zarr_format=3,
)
arr[:] = data

# Full 2D boolean mask
mask = data > 3000
np.testing.assert_array_equal(arr.vindex[mask], data[mask])

# 1D boolean mask on rows via oindex
row_mask = np.zeros(60, dtype=bool)
row_mask[[0, 9, 10, 29, 30, 59]] = True # hits all chunk boundaries
np.testing.assert_array_equal(arr.oindex[row_mask, :], data[row_mask, :])

# 1D boolean mask on cols via oindex
col_mask = np.zeros(100, dtype=bool)
col_mask[[0, 24, 25, 49, 50, 99]] = True # hits all chunk boundaries
np.testing.assert_array_equal(arr.oindex[:, col_mask], data[:, col_mask])


def test_block_indexing_rectilinear() -> None:
"""Test block (chunk-level) indexing on rectilinear arrays."""
store = MemoryStore()
data = np.arange(6000, dtype="float64").reshape(60, 100)
arr = zarr.create_array(
store=store,
shape=(60, 100),
chunks=[[10, 20, 30], [25, 25, 25, 25]],
dtype="float64",
zarr_format=3,
)
arr[:] = data

# Individual blocks
np.testing.assert_array_equal(arr.blocks[0, 0], data[0:10, 0:25])
np.testing.assert_array_equal(arr.blocks[1, 0], data[10:30, 0:25])
np.testing.assert_array_equal(arr.blocks[2, 3], data[30:60, 75:100])

# Block range
np.testing.assert_array_equal(arr.blocks[0:2, 0:2], data[0:30, 0:50])
np.testing.assert_array_equal(arr.blocks[1:3, 2:4], data[10:60, 50:100])
Loading