diff --git a/.claude/sweep-accuracy-state.csv b/.claude/sweep-accuracy-state.csv index ed598e89a..105e9084d 100644 --- a/.claude/sweep-accuracy-state.csv +++ b/.claude/sweep-accuracy-state.csv @@ -6,7 +6,7 @@ contour,2026-05-01,,,,"Marching squares correct: NaN check uses self-inequality, corridor,2026-05-01,,LOW,1,"LOW: corridor inherits float32 from cost_distance; for very large accumulated costs, normalized = corridor - corridor_min loses precision near min (intrinsic to upstream dtype, not corridor itself). NaN handling correct (skipna min, np.isfinite check before normalize). All 4 backends route through pure xarray arithmetic; threshold uses dask/cupy/numpy where with try/except dispatch. No CRIT/HIGH issues." cost_distance,2026-06-16,3369,CRITICAL,5,"CRITICAL heap overflow (#3369/this PR): numba Dijkstra kernels _cost_distance_kernel + _cost_distance_tile_kernel sized the binary min-heap at height*width, but a lazy-deletion heap pushes a pixel on every improving relaxation, so push count exceeds h*w on non-uniform friction. _heap_push then writes OOB -> heap corruption, SIGABRT (exit 134, 'corrupted size vs prev_size') on iterative dask path; UB on numpy path. Reference heapq Dijkstra hits 44 pushes on a 6x6=36 grid. Fix: max_heap = h*w*(n_neighbors+1), tile kernel adds +2*(w+h)+4 for phase-2 boundary seeds. Verified: cupy relax kernel (parallel Bellman-Ford) does NOT use this heap, GPU path unaffected. CUDA available; numpy/cupy/dask+numpy/dask+cupy all agree post-fix over 30+40 random adversarial grids; 88 module tests pass (4 new regression tests). Cats 1-4 clean: dist float64 / out float32 fine; inf/nan/zero friction all impassable (tested); bounds guards use >=h/>=w; planar algorithm, no curvature expected. Supersedes prior #1191 (cupy max_iterations h+w->h*w, fixed PR #1192)." curvature,2026-03-30T15:00:00Z,,,,Formula matches ArcGIS reference. Backends consistent. No issues found. -dasymetric,2026-04-14T12:00:00Z,,,,Mass conservation correct. Weighted/binary/limiting_variable all verified. Pycnophylactic Tobler algorithm correct. +dasymetric,2026-06-20,3403,MEDIUM,2;5,"Cat2/Cat5: disaggregate(limiting_variable) silently dropped a zone's whole value when no pixel could absorb it (all pixels in a cap-0 class, e.g. all zero-weight) - returned finite zeros summing to 0 instead of NaN, violating the documented conservation property; weighted method returns NaN for the same input. Fix #3403/this PR: set zone result to NaN when n_overflow==0, matching weighted. Also flagged (LOW, not fixed, documented only): non-finite +Inf weights are not sanitized like negatives are - an Inf weight poisons the whole zone (wsum=inf -> NaN/0, mass lost), consistent across numpy/cupy/dask backends (not a divergence). Cat1/Cat3/Cat4 clean: weighted division is by sum of positives (no cancellation), no neighborhood stencil off-by-one (pycnophylactic shifts are symmetric and bounds-guarded), no geodesic/curvature math. CUDA available; cupy + dask+cupy parity verified (62 tests pass incl. cross-backend). Backends: cupy + dask paths delegate to the numpy core so the fix covers all four." diffusion,2026-05-01,,LOW,1;2;5,"LOW: no Kahan summation across long iterations (drift over 100k steps, standard for explicit Euler); lap=n+s+w+e-4*val has catastrophic cancellation for nearly-uniform large values; res=0 in attrs causes div-by-zero (no guard); dask+cupy boundary='nan' relies on dask accepting cp.nan as fill. CPU/GPU NaN handling consistent (np.isnan vs val!=val). depth=1 matches stencil radius. Memory guards, CFL check, step cap all in place. No CRIT/HIGH." edge_detection,2026-05-01,,,,Thin wrappers around convolve_2d with fixed Sobel/Prewitt/Laplacian kernels; no issues found emerging_hotspots,2026-04-30,,MEDIUM,2;3,MEDIUM: threshold_90 uses int() (truncation) instead of ceil() so n_times=11 requires only 9/11 (81.8%) instead of 90%. MEDIUM: NaN time steps produce gi_bin=0 which classifier counts as 'non-significant' rather than missing; threshold_90 uses full n_times not valid count. LOW: 'global_std == 0' check does not catch NaN std for fully/mostly NaN inputs. diff --git a/xrspatial/dasymetric.py b/xrspatial/dasymetric.py index 70045c7d6..39ad7aa18 100644 --- a/xrspatial/dasymetric.py +++ b/xrspatial/dasymetric.py @@ -391,6 +391,12 @@ def _disaggregate_limiting_numpy(zones, weight, values_dict, nodata_zone, n_overflow = int(overflow_mask.sum()) if n_overflow > 0: result[overflow_mask] += remaining / n_overflow + else: + # No pixel can absorb the leftover (e.g. every pixel in the + # zone is uninhabitable under the caps). Match the weighted + # method: a value with nowhere to go becomes NaN rather than + # silently vanishing into zeros. + result[zmask] = np.nan return result diff --git a/xrspatial/tests/test_dasymetric.py b/xrspatial/tests/test_dasymetric.py index a228ffdc3..96a41dc2b 100644 --- a/xrspatial/tests/test_dasymetric.py +++ b/xrspatial/tests/test_dasymetric.py @@ -258,6 +258,29 @@ def test_custom_density_caps(self): # conservation assert np.nansum(data) == pytest.approx(100.0) + def test_uninhabitable_zone_is_nan_not_zero(self): + """A zone with no habitable pixel must not silently lose its value. + + When every pixel falls in a cap-0 class (e.g. all zero-weight), + the value has nowhere to go. The result must be NaN (matching the + weighted method), not finite zeros that drop the value (#3403). + """ + zones_data = np.array([[1, 1], [1, 1]], dtype=np.float64) + weight_data = np.zeros((2, 2), dtype=np.float64) + zones = create_test_raster(zones_data, backend='numpy') + weight = create_test_raster(weight_data, backend='numpy') + + result = disaggregate(zones, {1: 500.0}, weight, + method='limiting_variable') + data = result.values + assert np.all(np.isnan(data)) + # the value is not silently zeroed away + assert np.nansum(data) == pytest.approx(0.0) + + # weighted method agrees: same input yields all-NaN + weighted = disaggregate(zones, {1: 500.0}, weight, method='weighted') + assert np.all(np.isnan(weighted.values)) + @pytest.mark.skipif(not has_dask_array(), reason="dask not available") def test_raises_for_dask(self, simple_zones_data, simple_weight_data, simple_values):