From 75f33bc70837ed196aaed3bbb9d882a50659f925 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sun, 24 May 2026 13:57:09 +0200 Subject: [PATCH 1/7] fix(model): apply coords-as-truth rule to mask in add_variables/add_constraints Routes ``mask`` through ``as_dataarray_in_coords(mask, data.coords)`` instead of ``as_dataarray(...) + broadcast_mask(...)``, so pandas ``Series`` / ``DataFrame`` masks missing a dimension are broadcast to the variable / constraint shape (parallel to the bounds fix in the previous PR). The ``add_variables`` ``mask`` type hint widens to ``MaskLike`` to match ``add_constraints``. The deprecation announced via ``FutureWarning`` in ``broadcast_mask`` ("Missing values will be filled with False ... In a future version, this will raise an error") is now in effect: masks whose coordinates are a sparse subset of the data's coordinates raise ``ValueError`` instead of silently filling missing entries. Mask dims not in the data raise ``ValueError`` instead of ``AssertionError`` for consistency with the bounds path. ``broadcast_mask`` had no other callers and is removed. Co-Authored-By: Claude Opus 4.7 (1M context) --- doc/release_notes.rst | 1 + linopy/model.py | 11 ++++------- test/test_constraints.py | 17 ++++++++++++----- test/test_variables.py | 19 +++++++++++++------ 4 files changed, 30 insertions(+), 18 deletions(-) diff --git a/doc/release_notes.rst b/doc/release_notes.rst index 3edc1c18..e8e27cc1 100644 --- a/doc/release_notes.rst +++ b/doc/release_notes.rst @@ -53,6 +53,7 @@ Most users should keep calling ``model.solve(...)``. If you want more control, y **Bug Fixes** * ``Model.add_variables``: 0.7.0 made ``coords`` (dims, order, and values) the source of truth for ``DataArray`` bounds; this release closes the two remaining gaps. Pandas ``Series`` / ``DataFrame`` bounds missing a dimension are broadcast to ``coords`` instead of being silently dropped (`#709 `__), and the variable's dimension order always follows ``coords`` regardless of bound type (`#706 `__). +* ``add_variables`` / ``add_constraints``: the same rule now applies to ``mask`` — pandas ``Series`` / ``DataFrame`` masks missing a dimension are broadcast to the variable/constraint shape. As previously announced via ``FutureWarning``, masks whose coordinates are a sparse subset of the data's coordinates now raise ``ValueError`` rather than silently filling missing entries with ``False``; masks with dimensions not in the data raise ``ValueError`` instead of ``AssertionError``. * ``add_piecewise_formulation`` now produces a reproducible dimension order in the broadcast breakpoint array. The previous set-based expansion gave a hash-randomized order that varied between processes. * SOS constraints on masked variables no longer cause solver-specific failures (Gurobi ``IndexError``, Xpress ``?404 Invalid column number``, LP parse errors, silent set corruption). ``Model.solve()`` and ``Model.to_file()`` now raise a clear ``NotImplementedError`` referring users to `#688 `__; pass ``reformulate_sos=True`` as a workaround. * ``Model.solve(..., reformulate_sos=True)`` now actually reformulates SOS constraints even when the solver supports them natively. Previously it was silently ignored with a warning. diff --git a/linopy/model.py b/linopy/model.py index 6132fb00..669ae11d 100644 --- a/linopy/model.py +++ b/linopy/model.py @@ -20,7 +20,7 @@ import pandas as pd import xarray as xr from deprecation import deprecated -from numpy import inf, ndarray +from numpy import inf from pandas.core.frame import DataFrame from pandas.core.series import Series from xarray import DataArray, Dataset @@ -32,7 +32,6 @@ as_dataarray_in_coords, assign_multiindex_safe, best_int, - broadcast_mask, maybe_replace_signs, replace_by_map, to_path, @@ -593,7 +592,7 @@ def add_variables( upper: Any = inf, coords: Sequence[Sequence | pd.Index] | Mapping | None = None, name: str | None = None, - mask: DataArray | ndarray | Series | None = None, + mask: MaskLike | None = None, binary: bool = False, integer: bool = False, semi_continuous: bool = False, @@ -713,8 +712,7 @@ def add_variables( self._check_valid_dim_names(data) if mask is not None: - mask = as_dataarray(mask, coords=data.coords, dims=data.dims).astype(bool) - mask = broadcast_mask(mask, data.labels) + mask = as_dataarray_in_coords(mask, data.coords).astype(bool) # Auto-mask based on NaN in bounds (use numpy for speed) if self.auto_mask: @@ -978,8 +976,7 @@ def add_constraints( (data,) = xr.broadcast(data, exclude=[TERM_DIM]) if mask is not None: - mask = as_dataarray(mask, coords=data.coords, dims=data.dims).astype(bool) - mask = broadcast_mask(mask, data.labels) + mask = as_dataarray_in_coords(mask, data.coords).astype(bool) # Auto-mask based on null expressions or NaN RHS (use numpy for speed) if self.auto_mask: diff --git a/test/test_constraints.py b/test/test_constraints.py index 1667bfec..e532e5ce 100644 --- a/test/test_constraints.py +++ b/test/test_constraints.py @@ -258,20 +258,27 @@ def test_masked_constraints_broadcast() -> None: assert (m.constraints.labels.bc2[:, 0:5] != -1).all() assert (m.constraints.labels.bc2[:, 5:10] == -1).all() + # Pandas Series with named index missing a dim is broadcast to data.coords. + mask_pd = pd.Series( + [True, False, True] + [False] * 7, index=pd.RangeIndex(10, name="dim_0") + ) + m.add_constraints(1 * x + 10 * y, EQUAL, 0, name="bc_pd", mask=mask_pd) + assert (m.constraints.labels.bc_pd[[0, 2], :] != -1).all() + assert (m.constraints.labels.bc_pd[[1, 3, 4, 5, 6, 7, 8, 9], :] == -1).all() + + # Mask with sparse coords (subset of data's coords) now raises instead of + # emitting a FutureWarning — the rule from the bounds path applies here too. mask3 = xr.DataArray( [True, True, False, False, False], dims=["dim_0"], coords={"dim_0": range(5)}, ) - with pytest.warns(FutureWarning, match="Missing values will be filled"): + with pytest.raises(ValueError, match="Coordinates for dimension 'dim_0'"): m.add_constraints(1 * x + 10 * y, EQUAL, 0, name="bc3", mask=mask3) - assert (m.constraints.labels.bc3[0:2, :] != -1).all() - assert (m.constraints.labels.bc3[2:5, :] == -1).all() - assert (m.constraints.labels.bc3[5:10, :] == -1).all() # Mask with extra dimension not in data should raise mask4 = xr.DataArray([True, False], dims=["extra_dim"]) - with pytest.raises(AssertionError, match="not a subset"): + with pytest.raises(ValueError, match="extra dimensions"): m.add_constraints(1 * x + 10 * y, EQUAL, 0, name="bc4", mask=mask4) diff --git a/test/test_variables.py b/test/test_variables.py index 37de6aff..c16c27ea 100644 --- a/test/test_variables.py +++ b/test/test_variables.py @@ -123,20 +123,27 @@ def test_variables_mask_broadcast() -> None: assert (y.labels[:, 0:5] != -1).all() assert (y.labels[:, 5:10] == -1).all() + # Pandas Series with named index missing a dim is broadcast to data.coords. + mask_pd = pd.Series( + [True, False, True] + [False] * 7, index=pd.RangeIndex(10, name="dim_0") + ) + v = m.add_variables(lower, upper, name="v", mask=mask_pd) + assert (v.labels[[0, 2], :] != -1).all() + assert (v.labels[[1, 3, 4, 5, 6, 7, 8, 9], :] == -1).all() + + # Mask with sparse coords (subset of data's coords) now raises instead of + # emitting a FutureWarning — the rule from the bounds path applies here too. mask3 = xr.DataArray( [True, True, False, False, False], dims=["dim_0"], coords={"dim_0": range(5)}, ) - with pytest.warns(FutureWarning, match="Missing values will be filled"): - z = m.add_variables(lower, upper, name="z", mask=mask3) - assert (z.labels[0:2, :] != -1).all() - assert (z.labels[2:5, :] == -1).all() - assert (z.labels[5:10, :] == -1).all() + with pytest.raises(ValueError, match="Coordinates for dimension 'dim_0'"): + m.add_variables(lower, upper, name="z", mask=mask3) # Mask with extra dimension not in data should raise mask4 = xr.DataArray([True, False], dims=["extra_dim"]) - with pytest.raises(AssertionError, match="not a subset"): + with pytest.raises(ValueError, match="extra dimensions"): m.add_variables(lower, upper, name="w", mask=mask4) From 461c2b69aababe4ceaa55ea1e7e88a48db10d1d4 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sun, 24 May 2026 18:41:28 +0200 Subject: [PATCH 2/7] refactor: unify as_dataarray; split broadcasting from coords validation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #723. Folds the body of `as_dataarray_in_coords` into `as_dataarray` and extracts the contract checks into `assert_compatible_with_coords`, so linopy now has one broadcasting primitive and one validation companion. `as_dataarray(arr, coords)` aligns the result against `coords` for every input type: labels positional inputs (numpy / unnamed pandas / scalar) by position, reindexes same-values-different-order, expands missing dims, and transposes to coords order. Extra dims and disagreeing value sets on shared dims pass through unchanged, so xarray broadcasting in expression arithmetic keeps working. `assert_compatible_with_coords(arr, coords)` enforces the strict contract (`arr.dims ⊆ coords.dims`, plus exact coord-value equality on shared dims). `add_variables` and `add_constraints` now call it after `as_dataarray` for `lower` / `upper` / `mask`, replacing the deleted `as_dataarray_in_coords` helper. `_coords_to_dict` filters MultiIndex level coords out of `xarray.Coordinates` inputs so the new strict-by-default path treats `station` (and not its derived `letter` / `num` levels) as the dim. Test suite: 3698 passed (no regressions). Two existing tests were updated to reflect the new "coords is source of truth" semantics: `test_as_dataarray_with_ndarray_coords_dict_set_dims_not_aligned` (extra coord entries now broadcast in) and `test_dataarray_extra_dims` (now triggers the subset check rather than the value-mismatch check). Microbenchmark in dev-scripts/benchmark_as_dataarray.py shows flat timings vs the base branch on both add_variables-heavy and arithmetic- heavy workloads. Co-Authored-By: Claude Opus 4.7 (1M context) --- linopy/common.py | 271 +++++++++++++++++++++++++----------------- linopy/model.py | 16 ++- test/test_common.py | 49 +++++++- test/test_variable.py | 4 +- 4 files changed, 224 insertions(+), 116 deletions(-) diff --git a/linopy/common.py b/linopy/common.py index dce26a7a..a0d54e3e 100644 --- a/linopy/common.py +++ b/linopy/common.py @@ -213,30 +213,20 @@ def numpy_to_dataarray( return DataArray(arr, coords=coords, dims=dims, **kwargs) -def as_dataarray( +def _as_dataarray_lax( arr: Any, coords: CoordsLike | None = None, dims: DimsLike | None = None, **kwargs: Any, ) -> DataArray: """ - Convert an object to a DataArray. - - Parameters - ---------- - arr: - The input object. - coords (Union[dict, list, None]): - The coordinates for the DataArray. If None, default coordinates will be used. - dims (Union[list, None]): - The dimensions for the DataArray. If None, the dimensions will be automatically generated. - **kwargs: - Additional keyword arguments to be passed to the DataArray constructor. + Type-dispatched DataArray conversion without any coords validation. - Returns - ------- - DataArray: - The converted DataArray. + This is the conversion primitive used by ``as_dataarray``: it picks the + right constructor for each supported input type but does not check the + result against ``coords``. Callers that need ``coords`` to govern the + output (dim order, shared-dim values, missing-dim expansion) should use + ``as_dataarray`` instead. """ if isinstance(arr, pd.Series | pd.DataFrame): arr = pandas_to_dataarray(arr, coords=coords, dims=dims, **kwargs) @@ -275,87 +265,58 @@ def as_dataarray( return arr -def _coords_to_dict( - coords: Sequence[Sequence | pd.Index] | Mapping, -) -> dict[str, Any]: - """ - Normalize coords to a dict mapping dim names to coordinate values. - - Entries must be ``pd.Index`` (named or not) or unnamed sequences - (``list`` / ``tuple`` / ``range`` / ``np.ndarray``). Other types — - notably ``xarray.DataArray`` — raise ``TypeError`` rather than - being silently dropped: callers should convert via - ``variable.indexes[]`` (or ``pd.Index(...)``) first. - """ - if isinstance(coords, Mapping): - return dict(coords) - result: dict[str, Any] = {} - for c in coords: - if isinstance(c, pd.Index): - if c.name: - result[c.name] = c - elif isinstance(c, list | tuple | range | np.ndarray): - pass # unnamed sequence contributes no named dim - else: - raise TypeError( - f"coords entries must be pd.Index or an unnamed sequence " - f"(list / tuple / range / numpy.ndarray); got " - f"{type(c).__name__}. For an xarray DataArray coord, pass " - f"`variable.indexes[]` (a pd.Index) instead." - ) - return result - - -def _named_pandas_to_dataarray(arr: pd.Series | pd.DataFrame) -> DataArray | None: +def as_dataarray( + arr: Any, + coords: CoordsLike | None = None, + dims: DimsLike | None = None, + **kwargs: Any, +) -> DataArray: """ - Convert a pandas Series or DataFrame with fully named axes to a DataArray. + Convert ``arr`` to a DataArray and broadcast it against ``coords``. - DataFrame columns (and column-MultiIndex levels) are stacked into the row - MultiIndex so each axis name becomes its own dimension. Returns ``None`` - if any axis (or MultiIndex level) is unnamed, so the caller can fall back - to ``as_dataarray``. - """ - names = list(arr.index.names) - if isinstance(arr, pd.DataFrame): - names += list(arr.columns.names) - # pd.Index.names entries can be any hashable (tuples, ints, ...). Only - # strings map cleanly to xarray dim names; everything else falls through. - if any(not isinstance(n, str) for n in names): - return None - - if isinstance(arr, pd.DataFrame): - arr = arr.stack(list(range(arr.columns.nlevels)), future_stack=True) - - return arr.to_xarray() + When ``coords`` carries named dimensions, the result is aligned with + those coords: + - positional inputs (numpy, polars, unnamed pandas, scalar) are labeled + with the coord dim names by position; + - for every dim shared between ``arr`` and ``coords``, same-values- + different-order coordinates are reindexed to ``coords`` order; + - dims present in ``coords`` but not in ``arr`` are expanded to the + ``coords`` shape; + - the result is transposed to ``coords`` order. -def as_dataarray_in_coords(arr: Any, coords: Any, **kwargs: Any) -> DataArray: - """ - Coerce ``arr`` into a DataArray that matches ``coords``. + Dimensions present in ``arr`` but not in ``coords`` are preserved so + standard xarray broadcasting keeps working. Disagreeing coord values + on a shared dim (i.e. value sets that are not equal as sets) are + passed through unchanged: downstream xarray alignment decides how to + combine them. To enforce that ``arr.dims`` ⊆ ``coords.dims`` and that + shared coord values match, use ``assert_compatible_with_coords``. - Strict-coords counterpart to ``as_dataarray``: ``coords`` is the - source of truth, so the returned DataArray has the dimensions, - dimension order, and coordinate values of ``coords``, regardless - of the input type. Pandas inputs with fully named axes are - converted via ``to_xarray`` so their axis names map to dimensions; - scalars, numpy arrays, and unnamed pandas go through - ``as_dataarray``. The result is then validated, expanded over - missing dims, and transposed; ``expand_dims`` and ``transpose`` - are no-ops when the array already matches. + Parameters + ---------- + arr + Input scalar / list / numpy / polars / pandas / DataArray. + coords + Mapping of dim name → coord values, or a sequence of ``pd.Index`` + / unnamed sequences. ``None`` falls back to xarray's default + labeling (no broadcasting). + dims + Optional dim-names hint, used for positional inputs and to bias + pandas-axis interpretation. + **kwargs + Forwarded to the underlying DataArray construction. - - Raises ``ValueError`` if the input has dimensions not in - ``coords``. - - Raises ``ValueError`` if shared dimension coordinates differ in - values. Same-values-different-order coordinates are reindexed. + Returns + ------- + DataArray + Broadcast against ``coords`` (extra dims preserved). """ if coords is None: - return as_dataarray(arr, coords, **kwargs) + return _as_dataarray_lax(arr, coords, dims, **kwargs) expected = _coords_to_dict(coords) if not expected: - return as_dataarray(arr, coords, **kwargs) - - orig_type_name = type(arr).__name__ + return _as_dataarray_lax(arr, coords, dims, **kwargs) if isinstance(arr, pd.Series | pd.DataFrame): converted = _named_pandas_to_dataarray(arr) @@ -365,16 +326,17 @@ def as_dataarray_in_coords(arr: Any, coords: Any, **kwargs: Any) -> DataArray: if not isinstance(arr, DataArray): # numpy/polars/unnamed-pandas inputs are positional — their only # meaningful information is the values; any axis labels are - # auto-generated. Default dims to coords' keys so as_dataarray + # auto-generated. Default dims to coords' keys so the lax conversion # labels axes correctly (instead of dim_0/dim_1), then re-assign # coords from expected so positional inputs align to coords by # position. A shape mismatch surfaces here as a clear xarray # "conflicting sizes" error rather than a confusing # "coordinates do not match" further down. - kwargs.setdefault("dims", list(expected)) - arr = as_dataarray(arr, coords, **kwargs) + if dims is None: + dims = list(expected) + arr = _as_dataarray_lax(arr, coords, dims=dims, **kwargs) # Skip MultiIndex dims — re-assigning a PandasMultiIndex coord emits - # a FutureWarning and isn't needed (as_dataarray already used it). + # a FutureWarning and isn't needed (the lax pass already used it). arr = arr.assign_coords( { d: expected[d] @@ -383,12 +345,6 @@ def as_dataarray_in_coords(arr: Any, coords: Any, **kwargs: Any) -> DataArray: } ) - extra = set(arr.dims) - set(expected) - if extra: - raise ValueError( - f"{orig_type_name} has extra dimensions not in coords: {extra}" - ) - for dim, coord_values in expected.items(): if dim not in arr.dims: continue @@ -400,17 +356,17 @@ def as_dataarray_in_coords(arr: Any, coords: Any, **kwargs: Any) -> DataArray: else pd.Index(coord_values) ) actual_idx = arr.coords[dim].to_index() - if not actual_idx.equals(expected_idx): - # Same values, different order → reindex to match expected order - if len(actual_idx) == len(expected_idx) and set(actual_idx) == set( - expected_idx - ): - arr = arr.reindex({dim: expected_idx}) - else: - raise ValueError( - f"Coordinates for dimension '{dim}' do not match: " - f"expected {expected_idx.tolist()}, got {actual_idx.tolist()}" - ) + if actual_idx.equals(expected_idx): + continue + # Same values, different order → reindex to match expected order. + # Different value sets are left alone: downstream xarray alignment + # (e.g. xr.align in arithmetic) handles them. Callers needing strict + # value matching (add_variables / add_constraints) should use + # ``assert_compatible_with_coords`` after this call. + if len(actual_idx) == len(expected_idx) and set(actual_idx) == set( + expected_idx + ): + arr = arr.reindex({dim: expected_idx}) # expand_dims prepends new dimensions and their coordinate variables; # the subsequent transpose restores coords order. Both are no-ops when @@ -439,6 +395,105 @@ def as_dataarray_in_coords(arr: Any, coords: Any, **kwargs: Any) -> DataArray: return arr +def assert_compatible_with_coords(arr: DataArray, coords: CoordsLike | None) -> None: + """ + Raise ``ValueError`` if ``arr`` is incompatible with ``coords``. + + ``arr`` is compatible with ``coords`` when both of the following hold: + + - every dim in ``arr.dims`` is also a dim in ``coords`` (no extras); + - for every dim shared between ``arr`` and ``coords``, the coord + values are equal. + + No-op when ``coords`` is ``None`` or carries no named dimensions. + """ + if coords is None: + return + expected = _coords_to_dict(coords) + if not expected: + return + extra = set(arr.dims) - set(expected) + if extra: + raise ValueError(f"DataArray has extra dimensions not in coords: {extra}") + for dim, coord_values in expected.items(): + if dim not in arr.dims: + continue + if isinstance(arr.indexes.get(dim), pd.MultiIndex): + continue + expected_idx = ( + coord_values + if isinstance(coord_values, pd.Index) + else pd.Index(coord_values) + ) + actual_idx = arr.coords[dim].to_index() + if not actual_idx.equals(expected_idx): + raise ValueError( + f"Coordinates for dimension '{dim}' do not match: " + f"expected {expected_idx.tolist()}, got {actual_idx.tolist()}" + ) + + +def _coords_to_dict( + coords: Sequence[Sequence | pd.Index] | Mapping, +) -> dict[str, Any]: + """ + Normalize coords to a dict mapping dim names to coordinate values. + + For ``xarray.Coordinates`` (and ``DataArray.coords``), only entries + that are actual dimensions are kept; derived MultiIndex level coords + are dropped here and re-attached by xarray downstream. Plain mappings + are returned as-is. For sequence inputs, entries must be ``pd.Index`` + (named or not) or unnamed sequences (``list`` / ``tuple`` / ``range`` + / ``np.ndarray``). Other types — notably ``xarray.DataArray`` — raise + ``TypeError`` rather than being silently dropped: callers should + convert via ``variable.indexes[]`` (or ``pd.Index(...)``) first. + """ + if isinstance(coords, Coordinates): + # Coordinates iterates over every coord variable, including + # MultiIndex level coords. Keep only the entries that are dims. + return {d: coords[d] for d in coords.dims if d in coords} + if isinstance(coords, Mapping): + return dict(coords) + result: dict[str, Any] = {} + for c in coords: + if isinstance(c, pd.Index): + if c.name: + result[c.name] = c + elif isinstance(c, list | tuple | range | np.ndarray): + pass # unnamed sequence contributes no named dim + else: + raise TypeError( + f"coords entries must be pd.Index or an unnamed sequence " + f"(list / tuple / range / numpy.ndarray); got " + f"{type(c).__name__}. For an xarray DataArray coord, pass " + f"`variable.indexes[]` (a pd.Index) instead." + ) + return result + + +def _named_pandas_to_dataarray(arr: pd.Series | pd.DataFrame) -> DataArray | None: + """ + Convert a pandas Series or DataFrame with fully named axes to a DataArray. + + DataFrame columns (and column-MultiIndex levels) are stacked into the row + MultiIndex so each axis name becomes its own dimension. Returns ``None`` + if any axis (or MultiIndex level) is unnamed, so the caller can fall back + to ``as_dataarray``. + """ + names = list(arr.index.names) + if isinstance(arr, pd.DataFrame): + names += list(arr.columns.names) + # pd.Index.names entries can be any hashable (tuples, ints, ...). Only + # strings map cleanly to xarray dim names; everything else falls through. + if any(not isinstance(n, str) for n in names): + return None + + if isinstance(arr, pd.DataFrame): + arr = arr.stack(list(range(arr.columns.nlevels)), future_stack=True) + + return arr.to_xarray() + + def broadcast_mask(mask: DataArray, labels: DataArray) -> DataArray: """ Broadcast a boolean mask to match the shape of labels. diff --git a/linopy/model.py b/linopy/model.py index 669ae11d..63a4aee0 100644 --- a/linopy/model.py +++ b/linopy/model.py @@ -29,7 +29,7 @@ from linopy import solvers from linopy.common import ( as_dataarray, - as_dataarray_in_coords, + assert_compatible_with_coords, assign_multiindex_safe, best_int, maybe_replace_signs, @@ -700,10 +700,14 @@ def add_variables( "Semi-continuous variables require a positive scalar lower bound." ) + lower_da = as_dataarray(lower, coords, **kwargs) + upper_da = as_dataarray(upper, coords, **kwargs) + assert_compatible_with_coords(lower_da, coords) + assert_compatible_with_coords(upper_da, coords) data = Dataset( { - "lower": as_dataarray_in_coords(lower, coords, **kwargs), - "upper": as_dataarray_in_coords(upper, coords, **kwargs), + "lower": lower_da, + "upper": upper_da, "labels": -1, } ) @@ -712,7 +716,8 @@ def add_variables( self._check_valid_dim_names(data) if mask is not None: - mask = as_dataarray_in_coords(mask, data.coords).astype(bool) + mask = as_dataarray(mask, data.coords).astype(bool) + assert_compatible_with_coords(mask, data.coords) # Auto-mask based on NaN in bounds (use numpy for speed) if self.auto_mask: @@ -976,7 +981,8 @@ def add_constraints( (data,) = xr.broadcast(data, exclude=[TERM_DIM]) if mask is not None: - mask = as_dataarray_in_coords(mask, data.coords).astype(bool) + mask = as_dataarray(mask, data.coords).astype(bool) + assert_compatible_with_coords(mask, data.coords) # Auto-mask based on null expressions or NaN RHS (use numpy for speed) if self.auto_mask: diff --git a/test/test_common.py b/test/test_common.py index 0c379a0b..977e06b2 100644 --- a/test/test_common.py +++ b/test/test_common.py @@ -19,6 +19,7 @@ from linopy.common import ( align, as_dataarray, + assert_compatible_with_coords, assign_multiindex_safe, best_int, get_dims_with_index_levels, @@ -345,13 +346,15 @@ def test_as_dataarray_with_ndarray_coords_dict_dims_aligned() -> None: def test_as_dataarray_with_ndarray_coords_dict_set_dims_not_aligned() -> None: + """Coords is source of truth: extra coord entries broadcast into the result.""" target_dims = ("dim_0", "dim_1") target_coords = {"dim_0": ["a", "b"], "dim_2": ["A", "B"]} arr = np.array([[1, 2], [3, 4]]) da = as_dataarray(arr, coords=target_coords, dims=target_dims) - assert da.dims == target_dims + # dims labels the positional axes; coords adds dim_2 by broadcast. + assert set(da.dims) == {"dim_0", "dim_1", "dim_2"} assert list(da.coords["dim_0"].values) == ["a", "b"] - assert "dim_2" not in da.coords + assert list(da.coords["dim_2"].values) == ["A", "B"] def test_as_dataarray_with_number() -> None: @@ -483,6 +486,48 @@ def test_as_dataarray_with_unsupported_type() -> None: as_dataarray(lambda x: 1, dims=["dim1"], coords=[["a"]]) +def test_as_dataarray_preserves_extra_dims_for_broadcasting() -> None: + """Extra dims in the input are not rejected — they broadcast downstream.""" + arr = DataArray( + [[1, 2], [3, 4], [5, 6]], + dims=["a", "t"], + coords={"a": [0, 1, 2], "t": [10, 20]}, + ) + coords = {"a": [0, 1, 2]} + da = as_dataarray(arr, coords=coords) + assert set(da.dims) == {"a", "t"} + assert list(da.coords["t"].values) == [10, 20] + + +def test_as_dataarray_keeps_disjoint_shared_dim_values() -> None: + """Different value sets on a shared dim are passed through (xr.align handles).""" + arr = DataArray([1, 2, 3, 4, 5], dims=["a"], coords={"a": [0, 1, 2, 3, 4]}) + coords = {"a": [2, 3]} + da = as_dataarray(arr, coords=coords) + # No exception, no reindex; downstream alignment intersects. + assert list(da.coords["a"].values) == [0, 1, 2, 3, 4] + + +def test_assert_compatible_with_coords_rejects_extra_dims() -> None: + arr = DataArray( + [[1, 2], [3, 4]], dims=["a", "b"], coords={"a": [0, 1], "b": [0, 1]} + ) + with pytest.raises(ValueError, match="extra dimensions"): + assert_compatible_with_coords(arr, {"a": [0, 1]}) + + +def test_assert_compatible_with_coords_rejects_value_mismatch() -> None: + arr = DataArray([1, 2, 3], dims=["a"], coords={"a": [0, 1, 2]}) + with pytest.raises(ValueError, match="do not match"): + assert_compatible_with_coords(arr, {"a": [10, 20, 30]}) + + +def test_assert_compatible_with_coords_allows_subset_dims() -> None: + """arr.dims ⊂ coords.dims is fine (broadcasting fills the missing dim).""" + arr = DataArray([1, 2, 3], dims=["a"], coords={"a": [0, 1, 2]}) + assert_compatible_with_coords(arr, {"a": [0, 1, 2], "b": [10, 20]}) # no raise + + def test_best_int() -> None: # Test for int8 assert best_int(127) == np.int8 diff --git a/test/test_variable.py b/test/test_variable.py index 1a49abd6..7ced26ff 100644 --- a/test/test_variable.py +++ b/test/test_variable.py @@ -428,7 +428,9 @@ def test_dataarray_coord_mismatch_upper(self, model: "Model") -> None: model.add_variables(upper=upper, coords=self.SEQ_COORDS, name="x") def test_dataarray_extra_dims(self, model: "Model") -> None: - lower = DataArray([[1, 2], [3, 4]], dims=["x", "y"]) + lower = DataArray( + [[1, 2], [3, 4], [5, 6]], dims=["x", "y"], coords={"x": [0, 1, 2]} + ) with pytest.raises(ValueError, match="extra dimensions"): model.add_variables(lower=lower, coords=self.DICT_COORDS, name="x") From fdff779b0775eeb391eeb9d69f724d13297dfce7 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Mon, 25 May 2026 11:09:06 +0200 Subject: [PATCH 3/7] feat: dims= names unnamed coords; doctest the add_variables contract Closes a silent-failure gap in the strict coords-as-truth path: when the caller passed ``coords=[[1, 2, 3]], dims=["x"]`` to ``add_variables``, ``_coords_to_dict`` returned an empty mapping (unnamed sequences carry no dim name), so the strict checks short-circuited and bounds with extra dims or mismatched values flowed through unchecked, producing variables with frankenstein outer-joined coord values. ``_coords_to_dict`` now accepts an optional ``dims`` argument that names unnamed sequence entries by position. ``as_dataarray`` and ``assert_compatible_with_coords`` plumb it through; ``add_variables`` forwards ``kwargs.get("dims")`` to the assertions for ``lower`` and ``upper``. ``coords=[[1, 2, 3]], dims=["x"]`` now enforces the same contract as ``coords={"x": [1, 2, 3]}`` or ``coords=[pd.Index([1, 2, 3], name="x")]``. Docstring of ``add_variables.coords`` documents the contract (subset-of-dims, dim order, value match with auto-reindex, missing-dim broadcast) and includes four doctests pinning it: the extra-dim raise, the value-mismatch raise, the same-values-different-order auto-reindex, and the unnamed-coords-plus-dims opt-in. Test suite: 3698 passed (parity with the previous commit on this branch). ``pytest --doctest-modules linopy/model.py -k add_variables`` also green. Co-Authored-By: Claude Opus 4.7 (1M context) --- linopy/common.py | 38 ++++++++++++++++++---- linopy/model.py | 83 ++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 105 insertions(+), 16 deletions(-) diff --git a/linopy/common.py b/linopy/common.py index a0d54e3e..b17c391c 100644 --- a/linopy/common.py +++ b/linopy/common.py @@ -314,7 +314,7 @@ def as_dataarray( if coords is None: return _as_dataarray_lax(arr, coords, dims, **kwargs) - expected = _coords_to_dict(coords) + expected = _coords_to_dict(coords, dims=dims) if not expected: return _as_dataarray_lax(arr, coords, dims, **kwargs) @@ -395,7 +395,11 @@ def as_dataarray( return arr -def assert_compatible_with_coords(arr: DataArray, coords: CoordsLike | None) -> None: +def assert_compatible_with_coords( + arr: DataArray, + coords: CoordsLike | None, + dims: DimsLike | None = None, +) -> None: """ Raise ``ValueError`` if ``arr`` is incompatible with ``coords``. @@ -405,11 +409,16 @@ def assert_compatible_with_coords(arr: DataArray, coords: CoordsLike | None) -> - for every dim shared between ``arr`` and ``coords``, the coord values are equal. + ``dims`` mirrors the ``dims`` argument of ``as_dataarray``: it names + unnamed entries in a sequence-form ``coords`` by position, so + ``coords=[[1, 2, 3]], dims=["x"]`` is enforced the same way as + ``coords={"x": [1, 2, 3]}``. + No-op when ``coords`` is ``None`` or carries no named dimensions. """ if coords is None: return - expected = _coords_to_dict(coords) + expected = _coords_to_dict(coords, dims=dims) if not expected: return extra = set(arr.dims) - set(expected) @@ -435,6 +444,7 @@ def assert_compatible_with_coords(arr: DataArray, coords: CoordsLike | None) -> def _coords_to_dict( coords: Sequence[Sequence | pd.Index] | Mapping, + dims: DimsLike | None = None, ) -> dict[str, Any]: """ Normalize coords to a dict mapping dim names to coordinate values. @@ -447,6 +457,11 @@ def _coords_to_dict( / ``np.ndarray``). Other types — notably ``xarray.DataArray`` — raise ``TypeError`` rather than being silently dropped: callers should convert via ``variable.indexes[]`` (or ``pd.Index(...)``) first. + + Unnamed sequence entries (or unnamed ``pd.Index``) gain a dim name + from ``dims`` by position when ``dims`` is provided, so callers that + pass ``coords=[[1, 2, 3]], dims=["x"]`` get the same strict + enforcement as ``coords={"x": [1, 2, 3]}``. """ if isinstance(coords, Coordinates): # Coordinates iterates over every coord variable, including @@ -454,13 +469,22 @@ def _coords_to_dict( return {d: coords[d] for d in coords.dims if d in coords} if isinstance(coords, Mapping): return dict(coords) + dim_names: list[Any] | None = None + if dims is not None: + dim_names = list(dims) if isinstance(dims, list | tuple) else [dims] result: dict[str, Any] = {} - for c in coords: + for i, c in enumerate(coords): if isinstance(c, pd.Index): - if c.name: - result[c.name] = c + name = ( + c.name + if c.name + else (dim_names[i] if dim_names and i < len(dim_names) else None) + ) + if name is not None: + result[name] = c elif isinstance(c, list | tuple | range | np.ndarray): - pass # unnamed sequence contributes no named dim + if dim_names and i < len(dim_names): + result[dim_names[i]] = pd.Index(c, name=dim_names[i]) else: raise TypeError( f"coords entries must be pd.Index or an unnamed sequence " diff --git a/linopy/model.py b/linopy/model.py index 63a4aee0..67ef7a7d 100644 --- a/linopy/model.py +++ b/linopy/model.py @@ -615,14 +615,27 @@ def add_variables( upper : TYPE, optional Upper bound of the variable(s). Ignored if `binary` is True. The default is inf. - coords : list/xarray.Coordinates, optional - The coords of the variable array. When provided, ``coords`` + coords : list/dict/xarray.Coordinates, optional + The coords of the variable array. When provided with **named + dimensions** (a ``Mapping``, ``xarray.Coordinates``, a + sequence of named ``pd.Index`` objects, or an unnamed + sequence paired with ``dims=`` in ``**kwargs``), ``coords`` is the source of truth for the variable's dimensions, - dimension order, and coordinate values; ``lower`` and - ``upper`` are broadcast and aligned to match. One - optimization variable is added per combination of - coordinates. The default is None, in which case the shape - is inferred from the bounds. + order, and values. ``lower``, ``upper`` and ``mask`` are + aligned to this contract: + + - dims of every bound must be a subset of ``coords.dims``; + extra dims raise ``ValueError``; + - dim order in the variable always follows ``coords``; + - shared-dim coordinate values must equal ``coords``; same + values in a different order are auto-reindexed, different + value sets raise ``ValueError``; + - dims listed in ``coords`` but missing from a bound are + broadcast to ``coords`` shape. + + One optimization variable is added per combination of + coordinates. The default is ``None``, in which case the + shape is inferred from the bounds. name : str, optional Reference name of the added variables. The default None results in a name like "var1", "var2" etc. @@ -675,6 +688,58 @@ def add_variables( [7]: x[7] ∈ [0, inf] [8]: x[8] ∈ [0, inf] [9]: x[9] ∈ [0, inf] + + Strict coords-as-truth: a bound with an extra dim raises. + + >>> import xarray as xr + >>> m = Model() + >>> bad = xr.DataArray( + ... [[1.0, 2.0, 3.0]] * 2, + ... dims=["extra", "x"], + ... coords={"x": [0, 1, 2]}, + ... ) + >>> m.add_variables(lower=bad, coords=[pd.Index([0, 1, 2], name="x")], name="v") + Traceback (most recent call last): + ... + ValueError: DataArray has extra dimensions not in coords: {'extra'} + + Strict coords-as-truth: a bound whose shared-dim values don't + match raises. + + >>> m = Model() + >>> wrong = xr.DataArray( + ... [1.0, 2.0, 3.0], dims=["x"], coords={"x": [10, 20, 30]} + ... ) + >>> m.add_variables( + ... lower=wrong, coords=[pd.Index([0, 1, 2], name="x")], name="v" + ... ) + Traceback (most recent call last): + ... + ValueError: Coordinates for dimension 'x' do not match: expected [0, 1, 2], got [10, 20, 30] + + Strict coords-as-truth, helpful side: a bound whose coord values + match ``coords`` only in a different order is auto-reindexed. + + >>> m = Model() + >>> reordered = xr.DataArray( + ... [3.0, 1.0, 2.0], dims=["x"], coords={"x": ["c", "a", "b"]} + ... ) + >>> v = m.add_variables( + ... lower=reordered, + ... coords=[pd.Index(["a", "b", "c"], name="x")], + ... name="r", + ... ) + >>> list(v.data.lower.values) + [1.0, 2.0, 3.0] + + Unnamed-coords sequence + ``dims=`` opts into the same strict + enforcement as a named index — extra dims still raise. + + >>> m = Model() + >>> m.add_variables(lower=bad, coords=[[0, 1, 2]], dims=["x"], name="w") + Traceback (most recent call last): + ... + ValueError: DataArray has extra dimensions not in coords: {'extra'} """ if name is None: name = f"var{self._varnameCounter}" @@ -702,8 +767,8 @@ def add_variables( lower_da = as_dataarray(lower, coords, **kwargs) upper_da = as_dataarray(upper, coords, **kwargs) - assert_compatible_with_coords(lower_da, coords) - assert_compatible_with_coords(upper_da, coords) + assert_compatible_with_coords(lower_da, coords, dims=kwargs.get("dims")) + assert_compatible_with_coords(upper_da, coords, dims=kwargs.get("dims")) data = Dataset( { "lower": lower_da, From a58ef685da53376509b2c8816b56b17212cf03dd Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Mon, 25 May 2026 12:07:20 +0200 Subject: [PATCH 4/7] feat: add align_to_coords with semantic validation error messages Introduce align_to_coords to wrap as_dataarray and assert_compatible_with_coords with user-facing labels (lower bound, upper bound, mask). Errors now name the argument and distinguish extra dimensions, coordinate mismatches, and conversion failures. Extend mask validation to use coords+dims= when provided. Co-authored-by: Cursor --- doc/release_notes.rst | 8 +++++++ linopy/common.py | 52 ++++++++++++++++++++++++++++++++++++---- linopy/model.py | 39 ++++++++++++++++++++---------- test/test_common.py | 30 +++++++++++++++++++++-- test/test_constraints.py | 4 ++-- test/test_variable.py | 31 +++++++++++++++++------- test/test_variables.py | 4 ++-- 7 files changed, 137 insertions(+), 31 deletions(-) diff --git a/doc/release_notes.rst b/doc/release_notes.rst index e8e27cc1..13763e5f 100644 --- a/doc/release_notes.rst +++ b/doc/release_notes.rst @@ -67,6 +67,14 @@ Most users should keep calling ``model.solve(...)``. If you want more control, y **Internal** +* ``linopy.common.as_dataarray`` is now the single broadcasting primitive; + strict subset-dim / coord-value checks live in + ``assert_compatible_with_coords`` (via ``align_to_coords`` in + ``add_variables`` / ``add_constraints``). Validation errors name the + argument (``lower bound``, ``upper bound``, ``mask``) and explain whether + dimensions or coordinate values disagree with ``coords``. When ``coords`` is + a mapping, extra keys beyond the positional ``dims`` are broadcast in rather + than dropped. * Each ``Solver`` subclass now overrides at most three hooks: ``_build_direct`` (build the native model), ``_run_direct`` (run it), and ``_run_file`` (run the solver on an LP/MPS file). File-only solvers (CBC, GLPK, CPLEX, SCIP, Knitro, COPT, MindOpt) only override ``_run_file``. * New ``ConstraintLabelIndex`` cached on ``Model.constraints`` (mirrors the existing ``Variables.label_index``); ``ConstraintBase`` gains ``active_labels()`` and a ``range`` property; ``CSRConstraint`` exposes ``coords``. * ``linopy.common`` gains ``values_to_lookup_array``; the legacy pandas-based helpers ``series_to_lookup_array`` and ``lookup_vals`` are removed. diff --git a/linopy/common.py b/linopy/common.py index b17c391c..1f2a1784 100644 --- a/linopy/common.py +++ b/linopy/common.py @@ -21,6 +21,7 @@ from numpy import arange, nan, signedinteger from polars.datatypes import DataTypeClass from xarray import Coordinates, DataArray, Dataset, apply_ufunc, broadcast +from xarray.core.coordinates import CoordinateValidationError from xarray import align as xr_align from xarray.core import dtypes, indexing from xarray.core.types import JoinOptions, T_Alignable @@ -290,7 +291,10 @@ def as_dataarray( on a shared dim (i.e. value sets that are not equal as sets) are passed through unchanged: downstream xarray alignment decides how to combine them. To enforce that ``arr.dims`` ⊆ ``coords.dims`` and that - shared coord values match, use ``assert_compatible_with_coords``. + shared coord values match, use ``assert_compatible_with_coords`` (called + automatically for ``lower``, ``upper``, and ``mask`` in + :meth:`~linopy.model.Model.add_variables` and for ``mask`` in + :meth:`~linopy.model.Model.add_constraints`). Parameters ---------- @@ -399,6 +403,8 @@ def assert_compatible_with_coords( arr: DataArray, coords: CoordsLike | None, dims: DimsLike | None = None, + *, + label: str | None = None, ) -> None: """ Raise ``ValueError`` if ``arr`` is incompatible with ``coords``. @@ -414,6 +420,8 @@ def assert_compatible_with_coords( ``coords=[[1, 2, 3]], dims=["x"]`` is enforced the same way as ``coords={"x": [1, 2, 3]}``. + ``label`` names the argument in error messages (e.g. ``"lower bound"``). + No-op when ``coords`` is ``None`` or carries no named dimensions. """ if coords is None: @@ -421,9 +429,15 @@ def assert_compatible_with_coords( expected = _coords_to_dict(coords, dims=dims) if not expected: return - extra = set(arr.dims) - set(expected) + subject = label or "Value" + expected_dims = set(expected) + extra = set(arr.dims) - expected_dims if extra: - raise ValueError(f"DataArray has extra dimensions not in coords: {extra}") + raise ValueError( + f"{subject} has dimension(s) {sorted(extra)} not declared in coords " + f"({sorted(expected_dims)}). Add them to coords or remove them from " + f"{subject.lower()}." + ) for dim, coord_values in expected.items(): if dim not in arr.dims: continue @@ -437,11 +451,39 @@ def assert_compatible_with_coords( actual_idx = arr.coords[dim].to_index() if not actual_idx.equals(expected_idx): raise ValueError( - f"Coordinates for dimension '{dim}' do not match: " - f"expected {expected_idx.tolist()}, got {actual_idx.tolist()}" + f"{subject}: coordinate values for dimension {dim!r} do not match " + f"coords — expected {expected_idx.tolist()}, got " + f"{actual_idx.tolist()}." ) +def align_to_coords( + value: Any, + coords: CoordsLike | None, + *, + label: str, + **kwargs: Any, +) -> DataArray: + """ + Convert ``value`` with :func:`as_dataarray` and enforce the coords contract. + + Used by :meth:`~linopy.model.Model.add_variables` for ``lower``, ``upper``, + and ``mask``, and by :meth:`~linopy.model.Model.add_constraints` for + ``mask``. Raises :class:`ValueError` with a message that names ``label`` + when conversion or validation fails. + """ + try: + da = as_dataarray(value, coords, **kwargs) + except (ValueError, TypeError, CoordinateValidationError) as err: + raise ValueError( + f"{label} could not be aligned to coords: {err}" + ) from err + assert_compatible_with_coords( + da, coords, dims=kwargs.get("dims"), label=label + ) + return da + + def _coords_to_dict( coords: Sequence[Sequence | pd.Index] | Mapping, dims: DimsLike | None = None, diff --git a/linopy/model.py b/linopy/model.py index 67ef7a7d..fb2f231d 100644 --- a/linopy/model.py +++ b/linopy/model.py @@ -28,8 +28,8 @@ from linopy import solvers from linopy.common import ( + align_to_coords, as_dataarray, - assert_compatible_with_coords, assign_multiindex_safe, best_int, maybe_replace_signs, @@ -701,7 +701,7 @@ def add_variables( >>> m.add_variables(lower=bad, coords=[pd.Index([0, 1, 2], name="x")], name="v") Traceback (most recent call last): ... - ValueError: DataArray has extra dimensions not in coords: {'extra'} + ValueError: lower bound has dimension(s) ['extra'] not declared in coords ... Strict coords-as-truth: a bound whose shared-dim values don't match raises. @@ -715,7 +715,7 @@ def add_variables( ... ) Traceback (most recent call last): ... - ValueError: Coordinates for dimension 'x' do not match: expected [0, 1, 2], got [10, 20, 30] + ValueError: lower bound: coordinate values for dimension 'x' do not match coords ... Strict coords-as-truth, helpful side: a bound whose coord values match ``coords`` only in a different order is auto-reindexed. @@ -739,7 +739,18 @@ def add_variables( >>> m.add_variables(lower=bad, coords=[[0, 1, 2]], dims=["x"], name="w") Traceback (most recent call last): ... - ValueError: DataArray has extra dimensions not in coords: {'extra'} + ValueError: lower bound has dimension(s) ['extra'] not declared in coords ... + + The same strict contract applies to ``mask`` (including with + ``coords=[[...]], dims=[...]``). + + >>> m = Model() + >>> m.add_variables( + ... mask=bad, coords=[[0, 1, 2]], dims=["x"], name="wm" + ... ) + Traceback (most recent call last): + ... + ValueError: mask has dimension(s) ['extra'] not declared in coords ... """ if name is None: name = f"var{self._varnameCounter}" @@ -765,10 +776,8 @@ def add_variables( "Semi-continuous variables require a positive scalar lower bound." ) - lower_da = as_dataarray(lower, coords, **kwargs) - upper_da = as_dataarray(upper, coords, **kwargs) - assert_compatible_with_coords(lower_da, coords, dims=kwargs.get("dims")) - assert_compatible_with_coords(upper_da, coords, dims=kwargs.get("dims")) + lower_da = align_to_coords(lower, coords, label="lower bound", **kwargs) + upper_da = align_to_coords(upper, coords, label="upper bound", **kwargs) data = Dataset( { "lower": lower_da, @@ -781,8 +790,14 @@ def add_variables( self._check_valid_dim_names(data) if mask is not None: - mask = as_dataarray(mask, data.coords).astype(bool) - assert_compatible_with_coords(mask, data.coords) + if coords is not None: + mask_da = align_to_coords(mask, coords, label="mask", **kwargs) + else: + mask_kw = {k: v for k, v in kwargs.items() if k != "dims"} + mask_da = align_to_coords( + mask, data.coords, label="mask", dims=data.dims, **mask_kw + ) + mask = mask_da.astype(bool).broadcast_like(data.labels) # Auto-mask based on NaN in bounds (use numpy for speed) if self.auto_mask: @@ -1046,8 +1061,8 @@ def add_constraints( (data,) = xr.broadcast(data, exclude=[TERM_DIM]) if mask is not None: - mask = as_dataarray(mask, data.coords).astype(bool) - assert_compatible_with_coords(mask, data.coords) + mask_da = align_to_coords(mask, data.coords, label="mask") + mask = mask_da.astype(bool).broadcast_like(data.labels) # Auto-mask based on null expressions or NaN RHS (use numpy for speed) if self.auto_mask: diff --git a/test/test_common.py b/test/test_common.py index 977e06b2..7585ecf3 100644 --- a/test/test_common.py +++ b/test/test_common.py @@ -18,6 +18,7 @@ from linopy import LinearExpression, Model, Variable from linopy.common import ( align, + align_to_coords, as_dataarray, assert_compatible_with_coords, assign_multiindex_safe, @@ -512,13 +513,13 @@ def test_assert_compatible_with_coords_rejects_extra_dims() -> None: arr = DataArray( [[1, 2], [3, 4]], dims=["a", "b"], coords={"a": [0, 1], "b": [0, 1]} ) - with pytest.raises(ValueError, match="extra dimensions"): + with pytest.raises(ValueError, match=r"not declared in coords"): assert_compatible_with_coords(arr, {"a": [0, 1]}) def test_assert_compatible_with_coords_rejects_value_mismatch() -> None: arr = DataArray([1, 2, 3], dims=["a"], coords={"a": [0, 1, 2]}) - with pytest.raises(ValueError, match="do not match"): + with pytest.raises(ValueError, match="do not match coords"): assert_compatible_with_coords(arr, {"a": [10, 20, 30]}) @@ -528,6 +529,31 @@ def test_assert_compatible_with_coords_allows_subset_dims() -> None: assert_compatible_with_coords(arr, {"a": [0, 1, 2], "b": [10, 20]}) # no raise +def test_assert_compatible_with_coords_unnamed_coords_and_dims() -> None: + """coords=[[...]], dims=[...] enforces the same contract as a named mapping.""" + arr = DataArray([1, 2, 3], dims=["x"], coords={"x": [0, 1, 2]}) + assert_compatible_with_coords(arr, [[0, 1, 2]], dims=["x"]) # no raise + + bad = DataArray( + [[1, 2], [3, 4]], dims=["x", "y"], coords={"x": [0, 1], "y": [0, 1]} + ) + with pytest.raises(ValueError, match=r"not declared in coords"): + assert_compatible_with_coords(bad, [[0, 1]], dims=["x"]) + + +def test_assert_compatible_with_coords_label_in_error() -> None: + arr = DataArray( + [[1, 2], [3, 4]], dims=["a", "b"], coords={"a": [0, 1], "b": [0, 1]} + ) + with pytest.raises(ValueError, match=r"lower bound has dimension\(s\) \['b'\]"): + assert_compatible_with_coords(arr, {"a": [0, 1]}, label="lower bound") + + +def test_align_to_coords_wraps_conversion_errors() -> None: + with pytest.raises(ValueError, match=r"lower bound could not be aligned"): + align_to_coords(np.array([1, 2]), {"x": [0, 1, 2]}, label="lower bound") + + def test_best_int() -> None: # Test for int8 assert best_int(127) == np.int8 diff --git a/test/test_constraints.py b/test/test_constraints.py index e532e5ce..4453b01a 100644 --- a/test/test_constraints.py +++ b/test/test_constraints.py @@ -273,12 +273,12 @@ def test_masked_constraints_broadcast() -> None: dims=["dim_0"], coords={"dim_0": range(5)}, ) - with pytest.raises(ValueError, match="Coordinates for dimension 'dim_0'"): + with pytest.raises(ValueError, match=r"mask: coordinate values for dimension 'dim_0'"): m.add_constraints(1 * x + 10 * y, EQUAL, 0, name="bc3", mask=mask3) # Mask with extra dimension not in data should raise mask4 = xr.DataArray([True, False], dims=["extra_dim"]) - with pytest.raises(ValueError, match="extra dimensions"): + with pytest.raises(ValueError, match=r"mask has dimension\(s\) \['extra_dim'\]"): m.add_constraints(1 * x + 10 * y, EQUAL, 0, name="bc4", mask=mask4) diff --git a/test/test_variable.py b/test/test_variable.py index 7ced26ff..15effbee 100644 --- a/test/test_variable.py +++ b/test/test_variable.py @@ -419,21 +419,36 @@ def test_bound_types_with_coords( ) def test_dataarray_coord_mismatch(self, model: "Model", coords: Any) -> None: lower = DataArray([0, 0, 0], dims=["x"], coords={"x": [0, 1, 2]}) - with pytest.raises(ValueError, match="do not match"): + with pytest.raises(ValueError, match="lower bound.*do not match coords"): model.add_variables(lower=lower, coords=coords, name="x") def test_dataarray_coord_mismatch_upper(self, model: "Model") -> None: upper = DataArray([1, 2, 3], dims=["x"], coords={"x": [10, 20, 30]}) - with pytest.raises(ValueError, match="do not match"): + with pytest.raises(ValueError, match="upper bound.*do not match coords"): model.add_variables(upper=upper, coords=self.SEQ_COORDS, name="x") def test_dataarray_extra_dims(self, model: "Model") -> None: lower = DataArray( [[1, 2], [3, 4], [5, 6]], dims=["x", "y"], coords={"x": [0, 1, 2]} ) - with pytest.raises(ValueError, match="extra dimensions"): + with pytest.raises(ValueError, match=r"lower bound has dimension\(s\) \['y'\]"): model.add_variables(lower=lower, coords=self.DICT_COORDS, name="x") + def test_mask_extra_dims_with_unnamed_coords_and_dims(self, model: "Model") -> None: + """mask is validated against coords + dims= like lower/upper.""" + mask = DataArray( + [[True, False], [True, False], [False, True]], + dims=["x", "extra"], + coords={"x": [0, 1, 2]}, + ) + with pytest.raises(ValueError, match=r"mask has dimension\(s\) \['extra'\]"): + model.add_variables( + mask=mask, + coords=[[0, 1, 2]], + dims=["x"], + name="m", + ) + def test_dataarray_coord_reorder(self, model: "Model") -> None: """A bound whose coords differ only in order is reindexed to coords.""" lower = DataArray([3, 1, 2], dims=["x"], coords={"x": ["c", "a", "b"]}) @@ -479,9 +494,9 @@ def test_positional_bound_wrong_size_raises_clear_error( not a 'coordinates do not match' error. """ coords = [pd.Index(list("abc"), name="x")] - with pytest.raises(Exception, match="conflicting sizes|do not match"): + with pytest.raises(ValueError, match=r"upper bound could not be aligned"): model.add_variables(upper=np.array([1, 2]), coords=coords, name="np_bad") - with pytest.raises(Exception, match="conflicting sizes|do not match"): + with pytest.raises(ValueError, match=r"upper bound could not be aligned"): model.add_variables(upper=pd.Series([1, 2]), coords=coords, name="s_bad") def test_unnamed_coords_short_circuit(self, model: "Model") -> None: @@ -677,7 +692,7 @@ def test_one_dataarray_mismatches_other_ok(self, model: "Model") -> None: """Only the mismatched bound should raise, regardless of the other.""" lower = DataArray([0, 0, 0], dims=["x"], coords={"x": [0, 1, 2]}) upper = DataArray([1, 1], dims=["x"], coords={"x": [10, 20]}) - with pytest.raises(ValueError, match="do not match"): + with pytest.raises(ValueError, match=r"upper bound.*do not match coords"): model.add_variables( lower=lower, upper=upper, coords=self.SEQ_COORDS, name="x" ) @@ -779,7 +794,7 @@ def test_reordered_coords_reindexed(self, model: "Model") -> None: def test_reordered_coords_different_values_raises(self, model: "Model") -> None: """Overlapping but not identical coord sets must still raise.""" lower = DataArray([10, 20], dims=["x"], coords={"x": ["a", "b"]}) - with pytest.raises(ValueError, match="do not match"): + with pytest.raises(ValueError, match=r"lower bound.*do not match coords"): model.add_variables(lower=lower, coords={"x": ["a", "c"]}, name="x") # -- String and datetime coordinates ----------------------------------- @@ -807,7 +822,7 @@ def test_string_coords_mismatch(self, model: "Model") -> None: lower = DataArray( [0, 0], dims=["region"], coords={"region": ["north", "south"]} ) - with pytest.raises(ValueError, match="do not match"): + with pytest.raises(ValueError, match=r"lower bound.*do not match coords"): model.add_variables( lower=lower, coords={"region": ["north", "south", "east"]}, diff --git a/test/test_variables.py b/test/test_variables.py index c16c27ea..de69aca8 100644 --- a/test/test_variables.py +++ b/test/test_variables.py @@ -138,12 +138,12 @@ def test_variables_mask_broadcast() -> None: dims=["dim_0"], coords={"dim_0": range(5)}, ) - with pytest.raises(ValueError, match="Coordinates for dimension 'dim_0'"): + with pytest.raises(ValueError, match=r"mask: coordinate values for dimension 'dim_0'"): m.add_variables(lower, upper, name="z", mask=mask3) # Mask with extra dimension not in data should raise mask4 = xr.DataArray([True, False], dims=["extra_dim"]) - with pytest.raises(ValueError, match="extra dimensions"): + with pytest.raises(ValueError, match=r"mask has dimension\(s\) \['extra_dim'\]"): m.add_variables(lower, upper, name="w", mask=mask4) From 351ae401eb99362f92e7c412d66e24234d2c28c0 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 25 May 2026 10:09:28 +0000 Subject: [PATCH 5/7] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- linopy/common.py | 10 +++------- linopy/model.py | 4 +--- test/test_constraints.py | 4 +++- test/test_variable.py | 2 +- test/test_variables.py | 4 +++- 5 files changed, 11 insertions(+), 13 deletions(-) diff --git a/linopy/common.py b/linopy/common.py index 1f2a1784..9e7c424c 100644 --- a/linopy/common.py +++ b/linopy/common.py @@ -21,9 +21,9 @@ from numpy import arange, nan, signedinteger from polars.datatypes import DataTypeClass from xarray import Coordinates, DataArray, Dataset, apply_ufunc, broadcast -from xarray.core.coordinates import CoordinateValidationError from xarray import align as xr_align from xarray.core import dtypes, indexing +from xarray.core.coordinates import CoordinateValidationError from xarray.core.types import JoinOptions, T_Alignable from xarray.namedarray.utils import is_dict_like @@ -475,12 +475,8 @@ def align_to_coords( try: da = as_dataarray(value, coords, **kwargs) except (ValueError, TypeError, CoordinateValidationError) as err: - raise ValueError( - f"{label} could not be aligned to coords: {err}" - ) from err - assert_compatible_with_coords( - da, coords, dims=kwargs.get("dims"), label=label - ) + raise ValueError(f"{label} could not be aligned to coords: {err}") from err + assert_compatible_with_coords(da, coords, dims=kwargs.get("dims"), label=label) return da diff --git a/linopy/model.py b/linopy/model.py index fb2f231d..8e4421b1 100644 --- a/linopy/model.py +++ b/linopy/model.py @@ -745,9 +745,7 @@ def add_variables( ``coords=[[...]], dims=[...]``). >>> m = Model() - >>> m.add_variables( - ... mask=bad, coords=[[0, 1, 2]], dims=["x"], name="wm" - ... ) + >>> m.add_variables(mask=bad, coords=[[0, 1, 2]], dims=["x"], name="wm") Traceback (most recent call last): ... ValueError: mask has dimension(s) ['extra'] not declared in coords ... diff --git a/test/test_constraints.py b/test/test_constraints.py index 4453b01a..acc41b2e 100644 --- a/test/test_constraints.py +++ b/test/test_constraints.py @@ -273,7 +273,9 @@ def test_masked_constraints_broadcast() -> None: dims=["dim_0"], coords={"dim_0": range(5)}, ) - with pytest.raises(ValueError, match=r"mask: coordinate values for dimension 'dim_0'"): + with pytest.raises( + ValueError, match=r"mask: coordinate values for dimension 'dim_0'" + ): m.add_constraints(1 * x + 10 * y, EQUAL, 0, name="bc3", mask=mask3) # Mask with extra dimension not in data should raise diff --git a/test/test_variable.py b/test/test_variable.py index 15effbee..93665cd5 100644 --- a/test/test_variable.py +++ b/test/test_variable.py @@ -435,7 +435,7 @@ def test_dataarray_extra_dims(self, model: "Model") -> None: model.add_variables(lower=lower, coords=self.DICT_COORDS, name="x") def test_mask_extra_dims_with_unnamed_coords_and_dims(self, model: "Model") -> None: - """mask is validated against coords + dims= like lower/upper.""" + """Mask is validated against coords + dims= like lower/upper.""" mask = DataArray( [[True, False], [True, False], [False, True]], dims=["x", "extra"], diff --git a/test/test_variables.py b/test/test_variables.py index de69aca8..e55ca680 100644 --- a/test/test_variables.py +++ b/test/test_variables.py @@ -138,7 +138,9 @@ def test_variables_mask_broadcast() -> None: dims=["dim_0"], coords={"dim_0": range(5)}, ) - with pytest.raises(ValueError, match=r"mask: coordinate values for dimension 'dim_0'"): + with pytest.raises( + ValueError, match=r"mask: coordinate values for dimension 'dim_0'" + ): m.add_variables(lower, upper, name="z", mask=mask3) # Mask with extra dimension not in data should raise From 9fa18efd5b29d62c99c8bdd7b2ecc313433469bc Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Mon, 25 May 2026 12:39:11 +0200 Subject: [PATCH 6/7] refactor(model): simplify mask align; preserve TypeError in align_to_coords Three cleanups on top of align_to_coords: - Drop the trailing ``.broadcast_like(data.labels)`` in ``add_variables`` and ``add_constraints`` mask paths. ``as_dataarray`` already expands missing dims to ``coords`` shape, so the broadcast was a no-op. - Stop overriding the caller's ``dims=`` in the ``add_variables`` mask path when ``coords is None``. The previous code stripped ``dims`` and forced ``dims=data.dims``; with ``data.coords`` being an xarray ``Coordinates`` with already-named dims, the user's ``dims`` is harmless to forward and the override was just hiding intent. Mask now goes through one ``align_to_coords`` call regardless of whether ``coords`` is supplied. - Split the exception handler in ``align_to_coords``: ``TypeError`` from unsupported input types is re-raised as ``TypeError`` (still labeled), while ``ValueError`` / ``CoordinateValidationError`` stay ``ValueError``. Preserves the original type signature for callers that want to ``except TypeError``. New test ``test_align_to_coords_preserves_type_errors`` pins the TypeError pass-through. Suite: 3703 passed. Co-Authored-By: Claude Opus 4.7 (1M context) --- linopy/common.py | 4 +++- linopy/model.py | 17 +++++++---------- test/test_common.py | 6 ++++++ 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/linopy/common.py b/linopy/common.py index 9e7c424c..44128940 100644 --- a/linopy/common.py +++ b/linopy/common.py @@ -474,7 +474,9 @@ def align_to_coords( """ try: da = as_dataarray(value, coords, **kwargs) - except (ValueError, TypeError, CoordinateValidationError) as err: + except TypeError as err: + raise TypeError(f"{label} could not be aligned to coords: {err}") from err + except (ValueError, CoordinateValidationError) as err: raise ValueError(f"{label} could not be aligned to coords: {err}") from err assert_compatible_with_coords(da, coords, dims=kwargs.get("dims"), label=label) return da diff --git a/linopy/model.py b/linopy/model.py index 8e4421b1..01708f49 100644 --- a/linopy/model.py +++ b/linopy/model.py @@ -788,14 +788,12 @@ def add_variables( self._check_valid_dim_names(data) if mask is not None: - if coords is not None: - mask_da = align_to_coords(mask, coords, label="mask", **kwargs) - else: - mask_kw = {k: v for k, v in kwargs.items() if k != "dims"} - mask_da = align_to_coords( - mask, data.coords, label="mask", dims=data.dims, **mask_kw - ) - mask = mask_da.astype(bool).broadcast_like(data.labels) + mask = align_to_coords( + mask, + coords if coords is not None else data.coords, + label="mask", + **kwargs, + ).astype(bool) # Auto-mask based on NaN in bounds (use numpy for speed) if self.auto_mask: @@ -1059,8 +1057,7 @@ def add_constraints( (data,) = xr.broadcast(data, exclude=[TERM_DIM]) if mask is not None: - mask_da = align_to_coords(mask, data.coords, label="mask") - mask = mask_da.astype(bool).broadcast_like(data.labels) + mask = align_to_coords(mask, data.coords, label="mask").astype(bool) # Auto-mask based on null expressions or NaN RHS (use numpy for speed) if self.auto_mask: diff --git a/test/test_common.py b/test/test_common.py index 7585ecf3..a0f9526d 100644 --- a/test/test_common.py +++ b/test/test_common.py @@ -554,6 +554,12 @@ def test_align_to_coords_wraps_conversion_errors() -> None: align_to_coords(np.array([1, 2]), {"x": [0, 1, 2]}, label="lower bound") +def test_align_to_coords_preserves_type_errors() -> None: + """Unsupported input types stay TypeError (don't become ValueError).""" + with pytest.raises(TypeError, match=r"lower bound could not be aligned"): + align_to_coords(lambda x: x, {"x": [0, 1, 2]}, label="lower bound") + + def test_best_int() -> None: # Test for int8 assert best_int(127) == np.int8 From 5a4c992ea293eead0f20de64ffca3a3066ea8ee7 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Wed, 27 May 2026 12:32:48 +0200 Subject: [PATCH 7/7] refactor: rename assert_compatible_with_coords to validate_alignment Per PR review: align on the project's `validate_*` naming convention and remove the implicit "AssertionError" connotation of `assert_*`. Pairs naturally with `align_to_coords`. Co-Authored-By: Claude Opus 4.7 (1M context) --- doc/release_notes.rst | 2 +- linopy/common.py | 8 ++++---- test/test_common.py | 24 ++++++++++++------------ 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/doc/release_notes.rst b/doc/release_notes.rst index 13763e5f..7dd6dd3c 100644 --- a/doc/release_notes.rst +++ b/doc/release_notes.rst @@ -69,7 +69,7 @@ Most users should keep calling ``model.solve(...)``. If you want more control, y * ``linopy.common.as_dataarray`` is now the single broadcasting primitive; strict subset-dim / coord-value checks live in - ``assert_compatible_with_coords`` (via ``align_to_coords`` in + ``validate_alignment`` (via ``align_to_coords`` in ``add_variables`` / ``add_constraints``). Validation errors name the argument (``lower bound``, ``upper bound``, ``mask``) and explain whether dimensions or coordinate values disagree with ``coords``. When ``coords`` is diff --git a/linopy/common.py b/linopy/common.py index 238e22e3..58b34ed7 100644 --- a/linopy/common.py +++ b/linopy/common.py @@ -291,7 +291,7 @@ def as_dataarray( on a shared dim (i.e. value sets that are not equal as sets) are passed through unchanged: downstream xarray alignment decides how to combine them. To enforce that ``arr.dims`` ⊆ ``coords.dims`` and that - shared coord values match, use ``assert_compatible_with_coords`` (called + shared coord values match, use ``validate_alignment`` (called automatically for ``lower``, ``upper``, and ``mask`` in :meth:`~linopy.model.Model.add_variables` and for ``mask`` in :meth:`~linopy.model.Model.add_constraints`). @@ -366,7 +366,7 @@ def as_dataarray( # Different value sets are left alone: downstream xarray alignment # (e.g. xr.align in arithmetic) handles them. Callers needing strict # value matching (add_variables / add_constraints) should use - # ``assert_compatible_with_coords`` after this call. + # ``validate_alignment`` after this call. if len(actual_idx) == len(expected_idx) and set(actual_idx) == set( expected_idx ): @@ -399,7 +399,7 @@ def as_dataarray( return arr -def assert_compatible_with_coords( +def validate_alignment( arr: DataArray, coords: CoordsLike | None, dims: DimsLike | None = None, @@ -486,7 +486,7 @@ def align_to_coords( raise TypeError(f"{label} could not be aligned to coords: {err}") from err except (ValueError, CoordinateValidationError) as err: raise ValueError(f"{label} could not be aligned to coords: {err}") from err - assert_compatible_with_coords(da, coords, dims=kwargs.get("dims"), label=label) + validate_alignment(da, coords, dims=kwargs.get("dims"), label=label) return da diff --git a/test/test_common.py b/test/test_common.py index a0f9526d..a9e84bf3 100644 --- a/test/test_common.py +++ b/test/test_common.py @@ -20,13 +20,13 @@ align, align_to_coords, as_dataarray, - assert_compatible_with_coords, assign_multiindex_safe, best_int, get_dims_with_index_levels, is_constant, iterate_slices, maybe_group_terms_polars, + validate_alignment, ) from linopy.testing import assert_linequal, assert_varequal from linopy.types import CoordsLike @@ -509,44 +509,44 @@ def test_as_dataarray_keeps_disjoint_shared_dim_values() -> None: assert list(da.coords["a"].values) == [0, 1, 2, 3, 4] -def test_assert_compatible_with_coords_rejects_extra_dims() -> None: +def test_validate_alignment_rejects_extra_dims() -> None: arr = DataArray( [[1, 2], [3, 4]], dims=["a", "b"], coords={"a": [0, 1], "b": [0, 1]} ) with pytest.raises(ValueError, match=r"not declared in coords"): - assert_compatible_with_coords(arr, {"a": [0, 1]}) + validate_alignment(arr, {"a": [0, 1]}) -def test_assert_compatible_with_coords_rejects_value_mismatch() -> None: +def test_validate_alignment_rejects_value_mismatch() -> None: arr = DataArray([1, 2, 3], dims=["a"], coords={"a": [0, 1, 2]}) with pytest.raises(ValueError, match="do not match coords"): - assert_compatible_with_coords(arr, {"a": [10, 20, 30]}) + validate_alignment(arr, {"a": [10, 20, 30]}) -def test_assert_compatible_with_coords_allows_subset_dims() -> None: +def test_validate_alignment_allows_subset_dims() -> None: """arr.dims ⊂ coords.dims is fine (broadcasting fills the missing dim).""" arr = DataArray([1, 2, 3], dims=["a"], coords={"a": [0, 1, 2]}) - assert_compatible_with_coords(arr, {"a": [0, 1, 2], "b": [10, 20]}) # no raise + validate_alignment(arr, {"a": [0, 1, 2], "b": [10, 20]}) # no raise -def test_assert_compatible_with_coords_unnamed_coords_and_dims() -> None: +def test_validate_alignment_unnamed_coords_and_dims() -> None: """coords=[[...]], dims=[...] enforces the same contract as a named mapping.""" arr = DataArray([1, 2, 3], dims=["x"], coords={"x": [0, 1, 2]}) - assert_compatible_with_coords(arr, [[0, 1, 2]], dims=["x"]) # no raise + validate_alignment(arr, [[0, 1, 2]], dims=["x"]) # no raise bad = DataArray( [[1, 2], [3, 4]], dims=["x", "y"], coords={"x": [0, 1], "y": [0, 1]} ) with pytest.raises(ValueError, match=r"not declared in coords"): - assert_compatible_with_coords(bad, [[0, 1]], dims=["x"]) + validate_alignment(bad, [[0, 1]], dims=["x"]) -def test_assert_compatible_with_coords_label_in_error() -> None: +def test_validate_alignment_label_in_error() -> None: arr = DataArray( [[1, 2], [3, 4]], dims=["a", "b"], coords={"a": [0, 1], "b": [0, 1]} ) with pytest.raises(ValueError, match=r"lower bound has dimension\(s\) \['b'\]"): - assert_compatible_with_coords(arr, {"a": [0, 1]}, label="lower bound") + validate_alignment(arr, {"a": [0, 1]}, label="lower bound") def test_align_to_coords_wraps_conversion_errors() -> None: