diff --git a/doc/release_notes.rst b/doc/release_notes.rst index 1ffea106..7dd6dd3c 100644 --- a/doc/release_notes.rst +++ b/doc/release_notes.rst @@ -53,7 +53,7 @@ Most users should keep calling ``model.solve(...)``. If you want more control, y **Bug Fixes** * ``Model.add_variables``: 0.7.0 made ``coords`` (dims, order, and values) the source of truth for ``DataArray`` bounds; this release closes the two remaining gaps. Pandas ``Series`` / ``DataFrame`` bounds missing a dimension are broadcast to ``coords`` instead of being silently dropped (`#709 `__), and the variable's dimension order always follows ``coords`` regardless of bound type (`#706 `__). -* ``add_variables`` / ``add_constraints``: using `coords` as the source of truth now applies to ``mask``s too — pandas ``Series`` / ``DataFrame`` masks missing a dimension are broadcast to the variable/constraint shape. As previously announced via ``FutureWarning``, masks whose coordinates are a sparse subset of the data's coordinates now raise ``ValueError`` rather than silently filling missing entries with ``False``; masks with dimensions not in the data raise ``ValueError`` instead of ``AssertionError``. +* ``add_variables`` / ``add_constraints``: the same rule now applies to ``mask`` — pandas ``Series`` / ``DataFrame`` masks missing a dimension are broadcast to the variable/constraint shape. As previously announced via ``FutureWarning``, masks whose coordinates are a sparse subset of the data's coordinates now raise ``ValueError`` rather than silently filling missing entries with ``False``; masks with dimensions not in the data raise ``ValueError`` instead of ``AssertionError``. * ``add_piecewise_formulation`` now produces a reproducible dimension order in the broadcast breakpoint array. The previous set-based expansion gave a hash-randomized order that varied between processes. * SOS constraints on masked variables no longer cause solver-specific failures (Gurobi ``IndexError``, Xpress ``?404 Invalid column number``, LP parse errors, silent set corruption). ``Model.solve()`` and ``Model.to_file()`` now raise a clear ``NotImplementedError`` referring users to `#688 `__; pass ``reformulate_sos=True`` as a workaround. * ``Model.solve(..., reformulate_sos=True)`` now actually reformulates SOS constraints even when the solver supports them natively. Previously it was silently ignored with a warning. @@ -67,6 +67,14 @@ Most users should keep calling ``model.solve(...)``. If you want more control, y **Internal** +* ``linopy.common.as_dataarray`` is now the single broadcasting primitive; + strict subset-dim / coord-value checks live in + ``validate_alignment`` (via ``align_to_coords`` in + ``add_variables`` / ``add_constraints``). Validation errors name the + argument (``lower bound``, ``upper bound``, ``mask``) and explain whether + dimensions or coordinate values disagree with ``coords``. When ``coords`` is + a mapping, extra keys beyond the positional ``dims`` are broadcast in rather + than dropped. * Each ``Solver`` subclass now overrides at most three hooks: ``_build_direct`` (build the native model), ``_run_direct`` (run it), and ``_run_file`` (run the solver on an LP/MPS file). File-only solvers (CBC, GLPK, CPLEX, SCIP, Knitro, COPT, MindOpt) only override ``_run_file``. * New ``ConstraintLabelIndex`` cached on ``Model.constraints`` (mirrors the existing ``Variables.label_index``); ``ConstraintBase`` gains ``active_labels()`` and a ``range`` property; ``CSRConstraint`` exposes ``coords``. * ``linopy.common`` gains ``values_to_lookup_array``; the legacy pandas-based helpers ``series_to_lookup_array`` and ``lookup_vals`` are removed. diff --git a/linopy/common.py b/linopy/common.py index 6ca79a9e..58b34ed7 100644 --- a/linopy/common.py +++ b/linopy/common.py @@ -23,6 +23,7 @@ from xarray import Coordinates, DataArray, Dataset, apply_ufunc, broadcast from xarray import align as xr_align from xarray.core import dtypes, indexing +from xarray.core.coordinates import CoordinateValidationError from xarray.core.types import JoinOptions, T_Alignable from xarray.namedarray.utils import is_dict_like @@ -213,30 +214,20 @@ def numpy_to_dataarray( return DataArray(arr, coords=coords, dims=dims, **kwargs) -def as_dataarray( +def _as_dataarray_lax( arr: Any, coords: CoordsLike | None = None, dims: DimsLike | None = None, **kwargs: Any, ) -> DataArray: """ - Convert an object to a DataArray. + Type-dispatched DataArray conversion without any coords validation. - Parameters - ---------- - arr: - The input object. - coords (Union[dict, list, None]): - The coordinates for the DataArray. If None, default coordinates will be used. - dims (Union[list, None]): - The dimensions for the DataArray. If None, the dimensions will be automatically generated. - **kwargs: - Additional keyword arguments to be passed to the DataArray constructor. - - Returns - ------- - DataArray: - The converted DataArray. + This is the conversion primitive used by ``as_dataarray``: it picks the + right constructor for each supported input type but does not check the + result against ``coords``. Callers that need ``coords`` to govern the + output (dim order, shared-dim values, missing-dim expansion) should use + ``as_dataarray`` instead. """ if isinstance(arr, pd.Series | pd.DataFrame): arr = pandas_to_dataarray(arr, coords=coords, dims=dims, **kwargs) @@ -275,97 +266,61 @@ def as_dataarray( return arr -def _coords_to_dict( - coords: Sequence[Sequence | pd.Index] | Mapping, -) -> dict[str, Any]: - """ - Normalize coords to a dict mapping dim names to coordinate values. - - Entries must be ``pd.Index`` (named or not) or unnamed sequences - (``list`` / ``tuple`` / ``range`` / ``np.ndarray``). A - ``pd.MultiIndex`` must have ``.name`` set — xarray requires a - single dimension name for the flattened index. Other types — - notably ``xarray.DataArray`` — raise ``TypeError``; callers should - convert via ``variable.indexes[]`` first. - """ - if isinstance(coords, Mapping): - return dict(coords) - result: dict[str, Any] = {} - for c in coords: - if isinstance(c, pd.MultiIndex): - if not c.name: - raise TypeError( - "MultiIndex coords entries must have .name set so " - "xarray can use it as the dimension name. Set it via " - "`idx.name = 'my_dim'` before passing to coords." - ) - result[c.name] = c - elif isinstance(c, pd.Index): - if c.name: - result[c.name] = c - elif isinstance(c, list | tuple | range | np.ndarray): - pass - else: - raise TypeError( - f"coords entries must be pd.Index or an unnamed sequence " - f"(list / tuple / range / numpy.ndarray); got " - f"{type(c).__name__}. For an xarray DataArray coord, pass " - f"`variable.indexes[]` (a pd.Index) instead." - ) - return result - - -def _named_pandas_to_dataarray(arr: pd.Series | pd.DataFrame) -> DataArray | None: - """ - Convert a pandas Series or DataFrame with fully named axes to a DataArray. - - Returns ``None`` if any axis (or MultiIndex level) is unnamed or - non-string, so the caller can fall back to ``as_dataarray``. - """ - names = list(arr.index.names) - if isinstance(arr, pd.DataFrame): - names += list(arr.columns.names) - if any(not isinstance(n, str) for n in names): - return None - - if isinstance(arr, pd.DataFrame): - if isinstance(arr.index, pd.MultiIndex) or isinstance( - arr.columns, pd.MultiIndex - ): - arr = arr.stack(list(range(arr.columns.nlevels)), future_stack=True) - return arr.to_xarray() - return DataArray(arr) - - return arr.to_xarray() - - -def as_dataarray_in_coords(arr: Any, coords: Any, **kwargs: Any) -> DataArray: +def as_dataarray( + arr: Any, + coords: CoordsLike | None = None, + dims: DimsLike | None = None, + **kwargs: Any, +) -> DataArray: """ - Coerce ``arr`` into a DataArray that matches ``coords``. + Convert ``arr`` to a DataArray and broadcast it against ``coords``. + + When ``coords`` carries named dimensions, the result is aligned with + those coords: + + - positional inputs (numpy, polars, unnamed pandas, scalar) are labeled + with the coord dim names by position; + - for every dim shared between ``arr`` and ``coords``, same-values- + different-order coordinates are reindexed to ``coords`` order; + - dims present in ``coords`` but not in ``arr`` are expanded to the + ``coords`` shape; + - the result is transposed to ``coords`` order. + + Dimensions present in ``arr`` but not in ``coords`` are preserved so + standard xarray broadcasting keeps working. Disagreeing coord values + on a shared dim (i.e. value sets that are not equal as sets) are + passed through unchanged: downstream xarray alignment decides how to + combine them. To enforce that ``arr.dims`` ⊆ ``coords.dims`` and that + shared coord values match, use ``validate_alignment`` (called + automatically for ``lower``, ``upper``, and ``mask`` in + :meth:`~linopy.model.Model.add_variables` and for ``mask`` in + :meth:`~linopy.model.Model.add_constraints`). - Strict-coords counterpart to ``as_dataarray``: ``coords`` is the - source of truth, so the returned DataArray has the dimensions, - dimension order, and coordinate values of ``coords``, regardless - of the input type. Pandas inputs with fully named axes are - converted via ``to_xarray`` so their axis names map to dimensions; - scalars, numpy arrays, and unnamed pandas go through - ``as_dataarray``. The result is then validated, expanded over - missing dims, and transposed; ``expand_dims`` and ``transpose`` - are no-ops when the array already matches. + Parameters + ---------- + arr + Input scalar / list / numpy / polars / pandas / DataArray. + coords + Mapping of dim name → coord values, or a sequence of ``pd.Index`` + / unnamed sequences. ``None`` falls back to xarray's default + labeling (no broadcasting). + dims + Optional dim-names hint, used for positional inputs and to bias + pandas-axis interpretation. + **kwargs + Forwarded to the underlying DataArray construction. - - Raises ``ValueError`` if the input has dimensions not in - ``coords``. - - Raises ``ValueError`` if shared dimension coordinates differ in - values. Same-values-different-order coordinates are reindexed. + Returns + ------- + DataArray + Broadcast against ``coords`` (extra dims preserved). """ if coords is None: - return as_dataarray(arr, coords, **kwargs) + return _as_dataarray_lax(arr, coords, dims, **kwargs) - expected = _coords_to_dict(coords) + expected = _coords_to_dict(coords, dims=dims) if not expected: - return as_dataarray(arr, coords, **kwargs) - - orig_type_name = type(arr).__name__ + return _as_dataarray_lax(arr, coords, dims, **kwargs) if isinstance(arr, pd.Series | pd.DataFrame): converted = _named_pandas_to_dataarray(arr) @@ -375,16 +330,17 @@ def as_dataarray_in_coords(arr: Any, coords: Any, **kwargs: Any) -> DataArray: if not isinstance(arr, DataArray): # numpy/polars/unnamed-pandas inputs are positional — their only # meaningful information is the values; any axis labels are - # auto-generated. Default dims to coords' keys so as_dataarray + # auto-generated. Default dims to coords' keys so the lax conversion # labels axes correctly (instead of dim_0/dim_1), then re-assign # coords from expected so positional inputs align to coords by # position. A shape mismatch surfaces here as a clear xarray # "conflicting sizes" error rather than a confusing # "coordinates do not match" further down. - kwargs.setdefault("dims", list(expected)) - arr = as_dataarray(arr, coords, **kwargs) + if dims is None: + dims = list(expected) + arr = _as_dataarray_lax(arr, coords, dims=dims, **kwargs) # Skip MultiIndex dims — re-assigning a PandasMultiIndex coord emits - # a FutureWarning and isn't needed (as_dataarray already used it). + # a FutureWarning and isn't needed (the lax pass already used it). arr = arr.assign_coords( { d: expected[d] @@ -393,23 +349,10 @@ def as_dataarray_in_coords(arr: Any, coords: Any, **kwargs: Any) -> DataArray: } ) - extra = set(arr.dims) - set(expected) - if extra: - raise ValueError( - f"{orig_type_name} has extra dimensions not in coords: {extra}" - ) - for dim, coord_values in expected.items(): if dim not in arr.dims: continue - expected_is_mi = isinstance(coord_values, pd.MultiIndex) - actual_is_mi = isinstance(arr.indexes.get(dim), pd.MultiIndex) - if expected_is_mi or actual_is_mi: - if expected_is_mi and actual_is_mi: - if not arr.indexes[dim].equals(coord_values): - raise ValueError( - f"MultiIndex for dimension '{dim}' does not match coords" - ) + if isinstance(arr.indexes.get(dim), pd.MultiIndex): continue expected_idx = ( coord_values @@ -417,16 +360,17 @@ def as_dataarray_in_coords(arr: Any, coords: Any, **kwargs: Any) -> DataArray: else pd.Index(coord_values) ) actual_idx = arr.coords[dim].to_index() - if not actual_idx.equals(expected_idx): - if len(actual_idx) == len(expected_idx) and set(actual_idx) == set( - expected_idx - ): - arr = arr.reindex({dim: expected_idx}) - else: - raise ValueError( - f"Coordinates for dimension '{dim}' do not match: " - f"expected {expected_idx.tolist()}, got {actual_idx.tolist()}" - ) + if actual_idx.equals(expected_idx): + continue + # Same values, different order → reindex to match expected order. + # Different value sets are left alone: downstream xarray alignment + # (e.g. xr.align in arithmetic) handles them. Callers needing strict + # value matching (add_variables / add_constraints) should use + # ``validate_alignment`` after this call. + if len(actual_idx) == len(expected_idx) and set(actual_idx) == set( + expected_idx + ): + arr = arr.reindex({dim: expected_idx}) # expand_dims prepends new dimensions and their coordinate variables; # the subsequent transpose restores coords order. Both are no-ops when @@ -455,6 +399,184 @@ def as_dataarray_in_coords(arr: Any, coords: Any, **kwargs: Any) -> DataArray: return arr +def validate_alignment( + arr: DataArray, + coords: CoordsLike | None, + dims: DimsLike | None = None, + *, + label: str | None = None, +) -> None: + """ + Raise ``ValueError`` if ``arr`` is incompatible with ``coords``. + + ``arr`` is compatible with ``coords`` when both of the following hold: + + - every dim in ``arr.dims`` is also a dim in ``coords`` (no extras); + - for every dim shared between ``arr`` and ``coords``, the coord + values are equal. + + ``dims`` mirrors the ``dims`` argument of ``as_dataarray``: it names + unnamed entries in a sequence-form ``coords`` by position, so + ``coords=[[1, 2, 3]], dims=["x"]`` is enforced the same way as + ``coords={"x": [1, 2, 3]}``. + + ``label`` names the argument in error messages (e.g. ``"lower bound"``). + + No-op when ``coords`` is ``None`` or carries no named dimensions. + """ + if coords is None: + return + expected = _coords_to_dict(coords, dims=dims) + if not expected: + return + subject = label or "Value" + expected_dims = set(expected) + extra = set(arr.dims) - expected_dims + if extra: + raise ValueError( + f"{subject} has dimension(s) {sorted(extra)} not declared in coords " + f"({sorted(expected_dims)}). Add them to coords or remove them from " + f"{subject.lower()}." + ) + for dim, coord_values in expected.items(): + if dim not in arr.dims: + continue + expected_is_mi = isinstance(coord_values, pd.MultiIndex) + actual_is_mi = isinstance(arr.indexes.get(dim), pd.MultiIndex) + if expected_is_mi or actual_is_mi: + if expected_is_mi and actual_is_mi: + if not arr.indexes[dim].equals(coord_values): + raise ValueError( + f"{subject}: MultiIndex for dimension {dim!r} does not " + f"match coords." + ) + continue + expected_idx = ( + coord_values + if isinstance(coord_values, pd.Index) + else pd.Index(coord_values) + ) + actual_idx = arr.coords[dim].to_index() + if not actual_idx.equals(expected_idx): + raise ValueError( + f"{subject}: coordinate values for dimension {dim!r} do not match " + f"coords — expected {expected_idx.tolist()}, got " + f"{actual_idx.tolist()}." + ) + + +def align_to_coords( + value: Any, + coords: CoordsLike | None, + *, + label: str, + **kwargs: Any, +) -> DataArray: + """ + Convert ``value`` with :func:`as_dataarray` and enforce the coords contract. + + Used by :meth:`~linopy.model.Model.add_variables` for ``lower``, ``upper``, + and ``mask``, and by :meth:`~linopy.model.Model.add_constraints` for + ``mask``. Raises :class:`ValueError` with a message that names ``label`` + when conversion or validation fails. + """ + try: + da = as_dataarray(value, coords, **kwargs) + except TypeError as err: + raise TypeError(f"{label} could not be aligned to coords: {err}") from err + except (ValueError, CoordinateValidationError) as err: + raise ValueError(f"{label} could not be aligned to coords: {err}") from err + validate_alignment(da, coords, dims=kwargs.get("dims"), label=label) + return da + + +def _coords_to_dict( + coords: Sequence[Sequence | pd.Index] | Mapping, + dims: DimsLike | None = None, +) -> dict[str, Any]: + """ + Normalize coords to a dict mapping dim names to coordinate values. + + For ``xarray.Coordinates`` (and ``DataArray.coords``), only entries + that are actual dimensions are kept; derived MultiIndex level coords + are dropped here and re-attached by xarray downstream. Plain mappings + are returned as-is. For sequence inputs, entries must be ``pd.Index`` + (named or not) or unnamed sequences (``list`` / ``tuple`` / ``range`` + / ``np.ndarray``). A ``pd.MultiIndex`` must have ``.name`` set — + xarray requires a single dimension name for the flattened index. + Other types — notably ``xarray.DataArray`` — raise ``TypeError`` + rather than being silently dropped: callers should convert via + ``variable.indexes[]`` (or ``pd.Index(...)``) first. + + Unnamed sequence entries (or unnamed ``pd.Index``) gain a dim name + from ``dims`` by position when ``dims`` is provided, so callers that + pass ``coords=[[1, 2, 3]], dims=["x"]`` get the same strict + enforcement as ``coords={"x": [1, 2, 3]}``. + """ + if isinstance(coords, Coordinates): + # Coordinates iterates over every coord variable, including + # MultiIndex level coords. Keep only the entries that are dims. + return {d: coords[d] for d in coords.dims if d in coords} + if isinstance(coords, Mapping): + return dict(coords) + dim_names: list[Any] | None = None + if dims is not None: + dim_names = list(dims) if isinstance(dims, list | tuple) else [dims] + result: dict[str, Any] = {} + for i, c in enumerate(coords): + if isinstance(c, pd.MultiIndex): + if not c.name: + raise TypeError( + "MultiIndex coords entries must have .name set so " + "xarray can use it as the dimension name. Set it via " + "`idx.name = 'my_dim'` before passing to coords." + ) + result[c.name] = c + elif isinstance(c, pd.Index): + name = ( + c.name + if c.name + else (dim_names[i] if dim_names and i < len(dim_names) else None) + ) + if name is not None: + result[name] = c + elif isinstance(c, list | tuple | range | np.ndarray): + if dim_names and i < len(dim_names): + result[dim_names[i]] = pd.Index(c, name=dim_names[i]) + else: + raise TypeError( + f"coords entries must be pd.Index or an unnamed sequence " + f"(list / tuple / range / numpy.ndarray); got " + f"{type(c).__name__}. For an xarray DataArray coord, pass " + f"`variable.indexes[]` (a pd.Index) instead." + ) + return result + + +def _named_pandas_to_dataarray(arr: pd.Series | pd.DataFrame) -> DataArray | None: + """ + Convert a pandas Series or DataFrame with fully named axes to a DataArray. + + Returns ``None`` if any axis (or MultiIndex level) is unnamed or + non-string, so the caller can fall back to ``as_dataarray``. + """ + names = list(arr.index.names) + if isinstance(arr, pd.DataFrame): + names += list(arr.columns.names) + if any(not isinstance(n, str) for n in names): + return None + + if isinstance(arr, pd.DataFrame): + if isinstance(arr.index, pd.MultiIndex) or isinstance( + arr.columns, pd.MultiIndex + ): + arr = arr.stack(list(range(arr.columns.nlevels)), future_stack=True) + return arr.to_xarray() + return DataArray(arr) + + return arr.to_xarray() + + def broadcast_mask(mask: DataArray, labels: DataArray) -> DataArray: """ Broadcast a boolean mask to match the shape of labels. diff --git a/linopy/model.py b/linopy/model.py index 669ae11d..01708f49 100644 --- a/linopy/model.py +++ b/linopy/model.py @@ -28,8 +28,8 @@ from linopy import solvers from linopy.common import ( + align_to_coords, as_dataarray, - as_dataarray_in_coords, assign_multiindex_safe, best_int, maybe_replace_signs, @@ -615,14 +615,27 @@ def add_variables( upper : TYPE, optional Upper bound of the variable(s). Ignored if `binary` is True. The default is inf. - coords : list/xarray.Coordinates, optional - The coords of the variable array. When provided, ``coords`` + coords : list/dict/xarray.Coordinates, optional + The coords of the variable array. When provided with **named + dimensions** (a ``Mapping``, ``xarray.Coordinates``, a + sequence of named ``pd.Index`` objects, or an unnamed + sequence paired with ``dims=`` in ``**kwargs``), ``coords`` is the source of truth for the variable's dimensions, - dimension order, and coordinate values; ``lower`` and - ``upper`` are broadcast and aligned to match. One - optimization variable is added per combination of - coordinates. The default is None, in which case the shape - is inferred from the bounds. + order, and values. ``lower``, ``upper`` and ``mask`` are + aligned to this contract: + + - dims of every bound must be a subset of ``coords.dims``; + extra dims raise ``ValueError``; + - dim order in the variable always follows ``coords``; + - shared-dim coordinate values must equal ``coords``; same + values in a different order are auto-reindexed, different + value sets raise ``ValueError``; + - dims listed in ``coords`` but missing from a bound are + broadcast to ``coords`` shape. + + One optimization variable is added per combination of + coordinates. The default is ``None``, in which case the + shape is inferred from the bounds. name : str, optional Reference name of the added variables. The default None results in a name like "var1", "var2" etc. @@ -675,6 +688,67 @@ def add_variables( [7]: x[7] ∈ [0, inf] [8]: x[8] ∈ [0, inf] [9]: x[9] ∈ [0, inf] + + Strict coords-as-truth: a bound with an extra dim raises. + + >>> import xarray as xr + >>> m = Model() + >>> bad = xr.DataArray( + ... [[1.0, 2.0, 3.0]] * 2, + ... dims=["extra", "x"], + ... coords={"x": [0, 1, 2]}, + ... ) + >>> m.add_variables(lower=bad, coords=[pd.Index([0, 1, 2], name="x")], name="v") + Traceback (most recent call last): + ... + ValueError: lower bound has dimension(s) ['extra'] not declared in coords ... + + Strict coords-as-truth: a bound whose shared-dim values don't + match raises. + + >>> m = Model() + >>> wrong = xr.DataArray( + ... [1.0, 2.0, 3.0], dims=["x"], coords={"x": [10, 20, 30]} + ... ) + >>> m.add_variables( + ... lower=wrong, coords=[pd.Index([0, 1, 2], name="x")], name="v" + ... ) + Traceback (most recent call last): + ... + ValueError: lower bound: coordinate values for dimension 'x' do not match coords ... + + Strict coords-as-truth, helpful side: a bound whose coord values + match ``coords`` only in a different order is auto-reindexed. + + >>> m = Model() + >>> reordered = xr.DataArray( + ... [3.0, 1.0, 2.0], dims=["x"], coords={"x": ["c", "a", "b"]} + ... ) + >>> v = m.add_variables( + ... lower=reordered, + ... coords=[pd.Index(["a", "b", "c"], name="x")], + ... name="r", + ... ) + >>> list(v.data.lower.values) + [1.0, 2.0, 3.0] + + Unnamed-coords sequence + ``dims=`` opts into the same strict + enforcement as a named index — extra dims still raise. + + >>> m = Model() + >>> m.add_variables(lower=bad, coords=[[0, 1, 2]], dims=["x"], name="w") + Traceback (most recent call last): + ... + ValueError: lower bound has dimension(s) ['extra'] not declared in coords ... + + The same strict contract applies to ``mask`` (including with + ``coords=[[...]], dims=[...]``). + + >>> m = Model() + >>> m.add_variables(mask=bad, coords=[[0, 1, 2]], dims=["x"], name="wm") + Traceback (most recent call last): + ... + ValueError: mask has dimension(s) ['extra'] not declared in coords ... """ if name is None: name = f"var{self._varnameCounter}" @@ -700,10 +774,12 @@ def add_variables( "Semi-continuous variables require a positive scalar lower bound." ) + lower_da = align_to_coords(lower, coords, label="lower bound", **kwargs) + upper_da = align_to_coords(upper, coords, label="upper bound", **kwargs) data = Dataset( { - "lower": as_dataarray_in_coords(lower, coords, **kwargs), - "upper": as_dataarray_in_coords(upper, coords, **kwargs), + "lower": lower_da, + "upper": upper_da, "labels": -1, } ) @@ -712,7 +788,12 @@ def add_variables( self._check_valid_dim_names(data) if mask is not None: - mask = as_dataarray_in_coords(mask, data.coords).astype(bool) + mask = align_to_coords( + mask, + coords if coords is not None else data.coords, + label="mask", + **kwargs, + ).astype(bool) # Auto-mask based on NaN in bounds (use numpy for speed) if self.auto_mask: @@ -976,7 +1057,7 @@ def add_constraints( (data,) = xr.broadcast(data, exclude=[TERM_DIM]) if mask is not None: - mask = as_dataarray_in_coords(mask, data.coords).astype(bool) + mask = align_to_coords(mask, data.coords, label="mask").astype(bool) # Auto-mask based on null expressions or NaN RHS (use numpy for speed) if self.auto_mask: diff --git a/test/test_common.py b/test/test_common.py index 0c379a0b..a9e84bf3 100644 --- a/test/test_common.py +++ b/test/test_common.py @@ -18,6 +18,7 @@ from linopy import LinearExpression, Model, Variable from linopy.common import ( align, + align_to_coords, as_dataarray, assign_multiindex_safe, best_int, @@ -25,6 +26,7 @@ is_constant, iterate_slices, maybe_group_terms_polars, + validate_alignment, ) from linopy.testing import assert_linequal, assert_varequal from linopy.types import CoordsLike @@ -345,13 +347,15 @@ def test_as_dataarray_with_ndarray_coords_dict_dims_aligned() -> None: def test_as_dataarray_with_ndarray_coords_dict_set_dims_not_aligned() -> None: + """Coords is source of truth: extra coord entries broadcast into the result.""" target_dims = ("dim_0", "dim_1") target_coords = {"dim_0": ["a", "b"], "dim_2": ["A", "B"]} arr = np.array([[1, 2], [3, 4]]) da = as_dataarray(arr, coords=target_coords, dims=target_dims) - assert da.dims == target_dims + # dims labels the positional axes; coords adds dim_2 by broadcast. + assert set(da.dims) == {"dim_0", "dim_1", "dim_2"} assert list(da.coords["dim_0"].values) == ["a", "b"] - assert "dim_2" not in da.coords + assert list(da.coords["dim_2"].values) == ["A", "B"] def test_as_dataarray_with_number() -> None: @@ -483,6 +487,79 @@ def test_as_dataarray_with_unsupported_type() -> None: as_dataarray(lambda x: 1, dims=["dim1"], coords=[["a"]]) +def test_as_dataarray_preserves_extra_dims_for_broadcasting() -> None: + """Extra dims in the input are not rejected — they broadcast downstream.""" + arr = DataArray( + [[1, 2], [3, 4], [5, 6]], + dims=["a", "t"], + coords={"a": [0, 1, 2], "t": [10, 20]}, + ) + coords = {"a": [0, 1, 2]} + da = as_dataarray(arr, coords=coords) + assert set(da.dims) == {"a", "t"} + assert list(da.coords["t"].values) == [10, 20] + + +def test_as_dataarray_keeps_disjoint_shared_dim_values() -> None: + """Different value sets on a shared dim are passed through (xr.align handles).""" + arr = DataArray([1, 2, 3, 4, 5], dims=["a"], coords={"a": [0, 1, 2, 3, 4]}) + coords = {"a": [2, 3]} + da = as_dataarray(arr, coords=coords) + # No exception, no reindex; downstream alignment intersects. + assert list(da.coords["a"].values) == [0, 1, 2, 3, 4] + + +def test_validate_alignment_rejects_extra_dims() -> None: + arr = DataArray( + [[1, 2], [3, 4]], dims=["a", "b"], coords={"a": [0, 1], "b": [0, 1]} + ) + with pytest.raises(ValueError, match=r"not declared in coords"): + validate_alignment(arr, {"a": [0, 1]}) + + +def test_validate_alignment_rejects_value_mismatch() -> None: + arr = DataArray([1, 2, 3], dims=["a"], coords={"a": [0, 1, 2]}) + with pytest.raises(ValueError, match="do not match coords"): + validate_alignment(arr, {"a": [10, 20, 30]}) + + +def test_validate_alignment_allows_subset_dims() -> None: + """arr.dims ⊂ coords.dims is fine (broadcasting fills the missing dim).""" + arr = DataArray([1, 2, 3], dims=["a"], coords={"a": [0, 1, 2]}) + validate_alignment(arr, {"a": [0, 1, 2], "b": [10, 20]}) # no raise + + +def test_validate_alignment_unnamed_coords_and_dims() -> None: + """coords=[[...]], dims=[...] enforces the same contract as a named mapping.""" + arr = DataArray([1, 2, 3], dims=["x"], coords={"x": [0, 1, 2]}) + validate_alignment(arr, [[0, 1, 2]], dims=["x"]) # no raise + + bad = DataArray( + [[1, 2], [3, 4]], dims=["x", "y"], coords={"x": [0, 1], "y": [0, 1]} + ) + with pytest.raises(ValueError, match=r"not declared in coords"): + validate_alignment(bad, [[0, 1]], dims=["x"]) + + +def test_validate_alignment_label_in_error() -> None: + arr = DataArray( + [[1, 2], [3, 4]], dims=["a", "b"], coords={"a": [0, 1], "b": [0, 1]} + ) + with pytest.raises(ValueError, match=r"lower bound has dimension\(s\) \['b'\]"): + validate_alignment(arr, {"a": [0, 1]}, label="lower bound") + + +def test_align_to_coords_wraps_conversion_errors() -> None: + with pytest.raises(ValueError, match=r"lower bound could not be aligned"): + align_to_coords(np.array([1, 2]), {"x": [0, 1, 2]}, label="lower bound") + + +def test_align_to_coords_preserves_type_errors() -> None: + """Unsupported input types stay TypeError (don't become ValueError).""" + with pytest.raises(TypeError, match=r"lower bound could not be aligned"): + align_to_coords(lambda x: x, {"x": [0, 1, 2]}, label="lower bound") + + def test_best_int() -> None: # Test for int8 assert best_int(127) == np.int8 diff --git a/test/test_constraints.py b/test/test_constraints.py index e532e5ce..acc41b2e 100644 --- a/test/test_constraints.py +++ b/test/test_constraints.py @@ -273,12 +273,14 @@ def test_masked_constraints_broadcast() -> None: dims=["dim_0"], coords={"dim_0": range(5)}, ) - with pytest.raises(ValueError, match="Coordinates for dimension 'dim_0'"): + with pytest.raises( + ValueError, match=r"mask: coordinate values for dimension 'dim_0'" + ): m.add_constraints(1 * x + 10 * y, EQUAL, 0, name="bc3", mask=mask3) # Mask with extra dimension not in data should raise mask4 = xr.DataArray([True, False], dims=["extra_dim"]) - with pytest.raises(ValueError, match="extra dimensions"): + with pytest.raises(ValueError, match=r"mask has dimension\(s\) \['extra_dim'\]"): m.add_constraints(1 * x + 10 * y, EQUAL, 0, name="bc4", mask=mask4) diff --git a/test/test_variable.py b/test/test_variable.py index 6a123ff0..47c25986 100644 --- a/test/test_variable.py +++ b/test/test_variable.py @@ -419,19 +419,36 @@ def test_bound_types_with_coords( ) def test_dataarray_coord_mismatch(self, model: "Model", coords: Any) -> None: lower = DataArray([0, 0, 0], dims=["x"], coords={"x": [0, 1, 2]}) - with pytest.raises(ValueError, match="do not match"): + with pytest.raises(ValueError, match="lower bound.*do not match coords"): model.add_variables(lower=lower, coords=coords, name="x") def test_dataarray_coord_mismatch_upper(self, model: "Model") -> None: upper = DataArray([1, 2, 3], dims=["x"], coords={"x": [10, 20, 30]}) - with pytest.raises(ValueError, match="do not match"): + with pytest.raises(ValueError, match="upper bound.*do not match coords"): model.add_variables(upper=upper, coords=self.SEQ_COORDS, name="x") def test_dataarray_extra_dims(self, model: "Model") -> None: - lower = DataArray([[1, 2], [3, 4]], dims=["x", "y"]) - with pytest.raises(ValueError, match="extra dimensions"): + lower = DataArray( + [[1, 2], [3, 4], [5, 6]], dims=["x", "y"], coords={"x": [0, 1, 2]} + ) + with pytest.raises(ValueError, match=r"lower bound has dimension\(s\) \['y'\]"): model.add_variables(lower=lower, coords=self.DICT_COORDS, name="x") + def test_mask_extra_dims_with_unnamed_coords_and_dims(self, model: "Model") -> None: + """Mask is validated against coords + dims= like lower/upper.""" + mask = DataArray( + [[True, False], [True, False], [False, True]], + dims=["x", "extra"], + coords={"x": [0, 1, 2]}, + ) + with pytest.raises(ValueError, match=r"mask has dimension\(s\) \['extra'\]"): + model.add_variables( + mask=mask, + coords=[[0, 1, 2]], + dims=["x"], + name="m", + ) + def test_dataarray_coord_reorder(self, model: "Model") -> None: """A bound whose coords differ only in order is reindexed to coords.""" lower = DataArray([3, 1, 2], dims=["x"], coords={"x": ["c", "a", "b"]}) @@ -477,9 +494,9 @@ def test_positional_bound_wrong_size_raises_clear_error( not a 'coordinates do not match' error. """ coords = [pd.Index(list("abc"), name="x")] - with pytest.raises(Exception, match="conflicting sizes|do not match"): + with pytest.raises(ValueError, match=r"upper bound could not be aligned"): model.add_variables(upper=np.array([1, 2]), coords=coords, name="np_bad") - with pytest.raises(Exception, match="conflicting sizes|do not match"): + with pytest.raises(ValueError, match=r"upper bound could not be aligned"): model.add_variables(upper=pd.Series([1, 2]), coords=coords, name="s_bad") def test_unnamed_coords_short_circuit(self, model: "Model") -> None: @@ -658,7 +675,7 @@ def test_one_dataarray_mismatches_other_ok(self, model: "Model") -> None: """Only the mismatched bound should raise, regardless of the other.""" lower = DataArray([0, 0, 0], dims=["x"], coords={"x": [0, 1, 2]}) upper = DataArray([1, 1], dims=["x"], coords={"x": [10, 20]}) - with pytest.raises(ValueError, match="do not match"): + with pytest.raises(ValueError, match=r"upper bound.*do not match coords"): model.add_variables( lower=lower, upper=upper, coords=self.SEQ_COORDS, name="x" ) @@ -760,7 +777,7 @@ def test_reordered_coords_reindexed(self, model: "Model") -> None: def test_reordered_coords_different_values_raises(self, model: "Model") -> None: """Overlapping but not identical coord sets must still raise.""" lower = DataArray([10, 20], dims=["x"], coords={"x": ["a", "b"]}) - with pytest.raises(ValueError, match="do not match"): + with pytest.raises(ValueError, match=r"lower bound.*do not match coords"): model.add_variables(lower=lower, coords={"x": ["a", "c"]}, name="x") # -- String and datetime coordinates ----------------------------------- @@ -788,7 +805,7 @@ def test_string_coords_mismatch(self, model: "Model") -> None: lower = DataArray( [0, 0], dims=["region"], coords={"region": ["north", "south"]} ) - with pytest.raises(ValueError, match="do not match"): + with pytest.raises(ValueError, match=r"lower bound.*do not match coords"): model.add_variables( lower=lower, coords={"region": ["north", "south", "east"]}, diff --git a/test/test_variables.py b/test/test_variables.py index c16c27ea..e55ca680 100644 --- a/test/test_variables.py +++ b/test/test_variables.py @@ -138,12 +138,14 @@ def test_variables_mask_broadcast() -> None: dims=["dim_0"], coords={"dim_0": range(5)}, ) - with pytest.raises(ValueError, match="Coordinates for dimension 'dim_0'"): + with pytest.raises( + ValueError, match=r"mask: coordinate values for dimension 'dim_0'" + ): m.add_variables(lower, upper, name="z", mask=mask3) # Mask with extra dimension not in data should raise mask4 = xr.DataArray([True, False], dims=["extra_dim"]) - with pytest.raises(ValueError, match="extra dimensions"): + with pytest.raises(ValueError, match=r"mask has dimension\(s\) \['extra_dim'\]"): m.add_variables(lower, upper, name="w", mask=mask4)