From 267a6c4511180bb0d24ca44d3a186af4a0e9a6e4 Mon Sep 17 00:00:00 2001 From: Mark Kittisopikul Date: Fri, 20 Feb 2026 13:45:33 -0500 Subject: [PATCH 1/4] tests: Add non-power-of-2 shard shapes to benchmarks Add (30,30,30) to large_morton_shards and (10,10,10), (20,20,20), (30,30,30) to morton_iter_shapes to benchmark the scalar fallback path for non-power-of-2 shapes, which are not fully covered by the vectorized hypercube path. Co-Authored-By: Claude Sonnet 4.6 --- tests/benchmarks/test_indexing.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tests/benchmarks/test_indexing.py b/tests/benchmarks/test_indexing.py index d30d731f0f..57159076a6 100644 --- a/tests/benchmarks/test_indexing.py +++ b/tests/benchmarks/test_indexing.py @@ -106,7 +106,8 @@ def read_with_cache_clear() -> None: # Benchmark with larger chunks_per_shard to make Morton order impact more visible large_morton_shards = ( - (32,) * 3, # With 1x1x1 chunks: 32x32x32 = 32768 chunks per shard + (32,) * 3, # With 1x1x1 chunks: 32x32x32 = 32768 chunks per shard (power-of-2) + (30,) * 3, # With 1x1x1 chunks: 30x30x30 = 27000 chunks per shard (non-power-of-2) ) @@ -197,9 +198,12 @@ def read_with_cache_clear() -> None: # Benchmark for morton_order_iter directly (no I/O) morton_iter_shapes = ( - (8, 8, 8), # 512 elements - (16, 16, 16), # 4096 elements - (32, 32, 32), # 32768 elements + (8, 8, 8), # 512 elements (power-of-2) + (10, 10, 10), # 1000 elements (non-power-of-2) + (16, 16, 16), # 4096 elements (power-of-2) + (20, 20, 20), # 8000 elements (non-power-of-2) + (32, 32, 32), # 32768 elements (power-of-2) + (30, 30, 30), # 27000 elements (non-power-of-2) ) From 1dfd71dc1a2019eb0f90b521855ab6e83f2a9a0a Mon Sep 17 00:00:00 2001 From: Mark Kittisopikul Date: Fri, 20 Feb 2026 13:55:49 -0500 Subject: [PATCH 2/4] tests: Add near-miss power-of-2 shape (33,33,33) to benchmarks Documents the performance penalty when a shard shape is just above a power-of-2 boundary, causing n_z to jump from 32,768 to 262,144. Co-Authored-By: Claude Sonnet 4.6 --- tests/benchmarks/test_indexing.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/benchmarks/test_indexing.py b/tests/benchmarks/test_indexing.py index 57159076a6..76278da7dd 100644 --- a/tests/benchmarks/test_indexing.py +++ b/tests/benchmarks/test_indexing.py @@ -108,6 +108,7 @@ def read_with_cache_clear() -> None: large_morton_shards = ( (32,) * 3, # With 1x1x1 chunks: 32x32x32 = 32768 chunks per shard (power-of-2) (30,) * 3, # With 1x1x1 chunks: 30x30x30 = 27000 chunks per shard (non-power-of-2) + (33,) * 3, # With 1x1x1 chunks: 33x33x33 = 35937 chunks per shard (near-miss: just above power-of-2) ) @@ -204,6 +205,7 @@ def read_with_cache_clear() -> None: (20, 20, 20), # 8000 elements (non-power-of-2) (32, 32, 32), # 32768 elements (power-of-2) (30, 30, 30), # 27000 elements (non-power-of-2) + (33, 33, 33), # 35937 elements (near-miss: just above power-of-2, n_z=262144) ) From 403c50b6275c6d5502f1cb367ecce21d358a6fc4 Mon Sep 17 00:00:00 2001 From: Mark Kittisopikul Date: Fri, 20 Feb 2026 16:48:53 -0500 Subject: [PATCH 3/4] style: Apply ruff format to benchmark file Co-Authored-By: Claude Sonnet 4.6 --- tests/benchmarks/test_indexing.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/tests/benchmarks/test_indexing.py b/tests/benchmarks/test_indexing.py index 76278da7dd..385a85b5b5 100644 --- a/tests/benchmarks/test_indexing.py +++ b/tests/benchmarks/test_indexing.py @@ -108,7 +108,8 @@ def read_with_cache_clear() -> None: large_morton_shards = ( (32,) * 3, # With 1x1x1 chunks: 32x32x32 = 32768 chunks per shard (power-of-2) (30,) * 3, # With 1x1x1 chunks: 30x30x30 = 27000 chunks per shard (non-power-of-2) - (33,) * 3, # With 1x1x1 chunks: 33x33x33 = 35937 chunks per shard (near-miss: just above power-of-2) + (33,) + * 3, # With 1x1x1 chunks: 33x33x33 = 35937 chunks per shard (near-miss: just above power-of-2) ) @@ -199,13 +200,13 @@ def read_with_cache_clear() -> None: # Benchmark for morton_order_iter directly (no I/O) morton_iter_shapes = ( - (8, 8, 8), # 512 elements (power-of-2) - (10, 10, 10), # 1000 elements (non-power-of-2) - (16, 16, 16), # 4096 elements (power-of-2) - (20, 20, 20), # 8000 elements (non-power-of-2) - (32, 32, 32), # 32768 elements (power-of-2) - (30, 30, 30), # 27000 elements (non-power-of-2) - (33, 33, 33), # 35937 elements (near-miss: just above power-of-2, n_z=262144) + (8, 8, 8), # 512 elements (power-of-2) + (10, 10, 10), # 1000 elements (non-power-of-2) + (16, 16, 16), # 4096 elements (power-of-2) + (20, 20, 20), # 8000 elements (non-power-of-2) + (32, 32, 32), # 32768 elements (power-of-2) + (30, 30, 30), # 27000 elements (non-power-of-2) + (33, 33, 33), # 35937 elements (near-miss: just above power-of-2, n_z=262144) ) From ffa30657981af2364daa76fd895577b82975256c Mon Sep 17 00:00:00 2001 From: Mark Kittisopikul Date: Fri, 20 Feb 2026 19:25:40 -0500 Subject: [PATCH 4/4] changes: Add changelog entry for PR #3717 Co-Authored-By: Claude Sonnet 4.6 --- changes/3717.misc.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changes/3717.misc.md diff --git a/changes/3717.misc.md b/changes/3717.misc.md new file mode 100644 index 0000000000..5fed76b2b7 --- /dev/null +++ b/changes/3717.misc.md @@ -0,0 +1 @@ +Add benchmarks for Morton order computation with non-power-of-2 and near-miss shard shapes, covering both pure computation and end-to-end read/write performance.