From 7c6979ef368da7f12ec93d85930024d094238f26 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Sat, 20 Jun 2026 10:26:43 -0700 Subject: [PATCH 1/2] Propagate input dims/coords through true_color() (#3429) true_color() hardcoded output dims to ['y','x','band'] and rebuilt coords via r['y']/r['x'], so it raised KeyError on lat/lon (or row/col) rasters and dropped non-spatial coords like spatial_ref. Derive the two spatial dims and coords from the input instead, then append the band dim, matching how the other indices in the module preserve metadata. Verified on numpy, cupy, dask+numpy, dask+cupy. --- xrspatial/multispectral.py | 12 ++++----- xrspatial/tests/test_multispectral.py | 36 +++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 6 deletions(-) diff --git a/xrspatial/multispectral.py b/xrspatial/multispectral.py index 3a177af2d..d3efc4143 100644 --- a/xrspatial/multispectral.py +++ b/xrspatial/multispectral.py @@ -1821,11 +1821,11 @@ def true_color(r, g, b, nodata=1, c=10.0, th=0.125, name='true_color'): warnings.simplefilter('ignore') out = mapper(r)(r, g, b, nodata, c, th) - # TODO: output metadata: coords, dims, attrs - _dims = ['y', 'x', 'band'] - _attrs = r.attrs - _coords = {'y': r['y'], - 'x': r['x'], + # Preserve the input's spatial dims/coords instead of hardcoding y/x, + # then append the band dim. Hardcoding raised KeyError on lat/lon + # rasters and dropped extra coords like spatial_ref (issue #3429). + _dims = [*r.dims, 'band'] + _coords = {**{name: r[name] for name in r.coords}, 'band': [0, 1, 2, 3]} return DataArray( @@ -1833,7 +1833,7 @@ def true_color(r, g, b, nodata=1, c=10.0, th=0.125, name='true_color'): name=name, dims=_dims, coords=_coords, - attrs=_attrs, + attrs=r.attrs, ) diff --git a/xrspatial/tests/test_multispectral.py b/xrspatial/tests/test_multispectral.py index f83abc2a8..be18012ba 100644 --- a/xrspatial/tests/test_multispectral.py +++ b/xrspatial/tests/test_multispectral.py @@ -884,6 +884,42 @@ def test_true_color_mismatched_backends_raises(): true_color(red, green, blue) +# true_color metadata propagation (issue #3429) ---------- +def _tc_band(backend): + data = np.random.default_rng(3429).random((6, 6)).astype(np.float32) + return create_test_raster( + data, backend=backend, dims=['lat', 'lon'], + attrs={'res': (0.5, 0.5), 'crs': 'EPSG: 5070'}, + ) + + +@pytest.mark.parametrize( + "backend", + ["numpy", + pytest.param("dask+numpy", marks=dask_array_available), + pytest.param("cupy", marks=cuda_and_cupy_available), + pytest.param("dask+cupy", marks=cuda_and_cupy_available)], +) +def test_true_color_preserves_non_yx_dims(backend): + # true_color used to hardcode y/x and raised KeyError on lat/lon input. + r = _tc_band(backend) + g = _tc_band(backend) + b = _tc_band(backend) + out = true_color(r, g, b) + assert out.dims == ('lat', 'lon', 'band') + np.testing.assert_allclose(out['lat'].data, r['lat'].data) + np.testing.assert_allclose(out['lon'].data, r['lon'].data) + assert out.attrs == r.attrs + + +def test_true_color_preserves_extra_coords(): + # A non-spatial coord (e.g. rioxarray's spatial_ref) must pass through. + r = _tc_band('numpy').assign_coords(spatial_ref=0) + out = true_color(r, r.copy(), r.copy()) + assert 'spatial_ref' in out.coords + assert int(out['spatial_ref']) == 0 + + # NDSI ---------- @pytest.fixture def expected_ndsi(): From c7fe7cf9d954fc3810f5d4c37a69bbeadf2d3724 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Sat, 20 Jun 2026 10:28:53 -0700 Subject: [PATCH 2/2] sweep-metadata: record multispectral audit (#3429, PR #3434) --- .claude/sweep-metadata-state.csv | 1 + 1 file changed, 1 insertion(+) diff --git a/.claude/sweep-metadata-state.csv b/.claude/sweep-metadata-state.csv index f866c1283..4ad93347f 100644 --- a/.claude/sweep-metadata-state.csv +++ b/.claude/sweep-metadata-state.csv @@ -6,6 +6,7 @@ focal,2026-06-10,3217,MEDIUM,4;5,"Re-audited 2026-06-10 (agent-ad0d55a894c6abc60 geotiff,2026-06-09,3116,HIGH,2;3,"Re-audited 2026-06-09 (agent-ae89ff94a64e3ee8f worktree, branch deep-sweep-metadata-geotiff-2026-06-09). CUDA available; all 4 backends (numpy/cupy/dask+numpy/dask+cupy) run live. Focus: surfaces changed since the 2026-05-18 audit (unpack rename + GPU/dask+GPU support #3075, pack=True #3065/#3079, masked int->float promotion #2994, bbox= reads, rioxarray param alignment #2963, no-georef VRT coord synthesis #2824, GeoTransform omission #2971). Live probes: unpack attrs (scale_factor/add_offset/mask_and_scale_dtype/nodata/masked_nodata), masked=True promotion, default masked=False, bbox window+transform shift, multi-band band=N, dims/name/coords (incl. coord dtype) all identical across the 4 backends; nodata_pixels_present absent on dask paths is the documented lazy contract, not a bug. pack->unpack round trips verified on numpy/dask/gpu-write; pack of a cupy-backed read raises via the known cupy+xarray xp.astype incompat (see memory cupy_where_astype_incompat; dependency-pin fix, raises loudly, not a metadata bug). VRT reads (full/masked/window/bbox) and no-georef TIFF reads agree across the 4 backends. NEW HIGH finding #3116 (Cat 2+3): to_geotiff(non_georef_da, out.vrt, tile_size=N) wrote a corrupt index for arrays spanning >1 tile -- write_vrt derives placement from each source GeoTransform and non-georef tiles all carry the identity transform, so rasterX/YSize collapsed to one tile and every DstRect landed at the origin; reads silently returned a single tile (24x32 in -> 16x16 out). Gap left by #2966/#2971 (tests only covered one non-georef source). Fix: _write_vrt_tiled threads per-tile pixel offsets through _build_vrt -> write_vrt via internal dst_offsets kwarg; write_vrt refuses >1 all-non-georef sources without explicit placement and rejects dst_offsets alongside georeferenced sources. 18 new tests in tests/vrt/test_non_georef_placement_3116.py incl. 4-backend round trip, dask-backed and plain-ndarray writes, XML DstRect assertions, georef placement regression, and the write_vrt error contract. Full vrt suite 520 passed; write+round-trip suites 1292 passed." interpolate,2026-06-12,3288,MEDIUM,5,kriging K_inv-None fallback was numpy-backed on all backends and misnamed the variance raster; fixed via #3288. All 4 backends verified end-to-end on GPU host. LOW (documented only): template nodatavals/_FillValue copied verbatim while fill_value is the actual output sentinel; tests codify attrs==template.attrs mcda,2026-06-10,3147,HIGH,1,"constrain() dropped all attrs (res/crs/nodatavals) whenever exclude non-empty (xr.where takes attrs from scalar fill); fixed via attrs restore, tests for numpy/dask/dask+cupy. All other mcda funcs keep attrs/coords/dims on all 4 backends. Out-of-scope crashes noted for backend-parity: owa broken on cupy (numpy order-weights x cupy) and on dask (da.sort does not exist); sensitivity monte_carlo crashes on cupy/dask+cupy (.values on cupy); xr.where compute on cupy/dask+cupy hits known cupy13.6/xarray2025.12 incompat." +multispectral,2026-06-20,3429,MEDIUM,2;3,"true_color() hardcoded y/x dims + dropped extra coords; fixed PR #3434 (all 4 backends verified, CUDA available)" polygonize,2026-06-12,3293,MEDIUM,1,"Audited 2026-06-12 (agent-a86d90abea41b04cf worktree, branch deep-sweep-metadata-polygonize-2026-06-12). CUDA available; all 4 backends (numpy/cupy/dask+numpy/dask+cupy) run live for int, float+NaN, and no-georef rasters. polygonize returns vector output (numpy/awkward/geopandas/spatialpandas/geojson), not a DataArray, so Cats 2-4 reinterpreted as transform/CRS/value-dtype propagation. Transform auto-detect (attrs['transform'] -> rio.transform() -> x/y coords, #2536/#2607) and CRS resolution run in public polygonize() before dispatch, so all 4 backends emit identical columns, bounds, and CRS (verified live). Column value dtype follows input dtype on every backend. NEW MEDIUM finding #3293 (Cat 1): _detect_raster_crs ignored the _xrspatial_no_georef marker that _detect_raster_transform honours, so a geotiff-reader crs_only raster (attrs carry both crs and the marker; metadata_to_attrs writes crs independent of has_georef) produced a GeoDataFrame claiming EPSG:#### over pixel-space geometries -- the #2536 metadata-lies-about-the-data mismatch through the marker channel. contour.py imports the same helper and inherits the fix. Fix: early return None in _detect_raster_crs on the marker + docstring note; 2 new tests in TestPolygonizeCRSPropagation. polygonize+contour suites 274 passed; all 9 auxiliary polygonize test files 303 passed. rotated-read path unaffected (reader drops CRS there). No CRITICAL/HIGH/LOW findings." proximity,2026-05-29,2723,MEDIUM,4;5,"Audited 2026-05-29 (agent-a61dbadc2452a2003 worktree, branch deep-sweep-metadata-proximity-2026-05-29). CUDA+cupy available; all 4 backends (numpy/cupy/dask+numpy/dask+cupy) run live end-to-end for proximity/allocation/direction, both bounded (finite max_distance) and unbounded. Cat 1 (attrs res/crs/transform/nodatavals/_FillValue), Cat 2 (coords + coord dtype), and Cat 3 (dims) all preserved and identical across the 4 backends -- public funcs wrap with xr.DataArray(coords=raster.coords, dims=raster.dims, attrs=raster.attrs). NEW MEDIUM finding #2723 (Cat 4 + Cat 5): (a) bounded dask+numpy path (_process_dask -> da.map_overlap with meta=np.array(())) declared output dtype float64 while the chunk fn returns float32 and numpy/cupy/dask+cupy + the unbounded KDTree path all declare float32; docstrings show dtype=float32. Fix: meta=np.array((), dtype=np.float32). (b) dask backends leaked an internal dask op name (_trim-, _kdtree_chunk_fn-, asarray-) into result.name while numpy/cupy return None. Fix: assign result.name=None after construction in all 3 public funcs (xarray ignores a name=None kwarg for named dask arrays, so the reset must happen post-construction). Same .name-leak class as zonal #2611. PR #2728 off child branch deep-sweep-metadata-proximity-2026-05-29-01. New parametrized regression test test_output_metadata_consistent_across_backends asserts declared dtype float32 + name None across all 4 backends x 3 funcs x bounded/unbounded; full test_proximity.py suite 93 passed. No other CRITICAL/HIGH/MEDIUM/LOW findings." rasterize,2026-06-09,3087,MEDIUM,1,GeoDataFrame .crs dropped on no-like path (Cat 1); fixed via #3087 emitting attrs crs/crs_wkt when output has no CRS. like-path attrs/coords/dims/nodata verified live on all 4 backends (CUDA available); Cats 2-5 clean.