diff --git a/pytensor/xtensor/__init__.py b/pytensor/xtensor/__init__.py index a72bf66c79..06265e40de 100644 --- a/pytensor/xtensor/__init__.py +++ b/pytensor/xtensor/__init__.py @@ -7,7 +7,6 @@ ) from pytensor.xtensor.shape import concat from pytensor.xtensor.type import ( - XTensorType, as_xtensor, xtensor, xtensor_constant, diff --git a/pytensor/xtensor/indexing.py b/pytensor/xtensor/indexing.py new file mode 100644 index 0000000000..91e74017c9 --- /dev/null +++ b/pytensor/xtensor/indexing.py @@ -0,0 +1,186 @@ +# HERE LIE DRAGONS +# Useful links to make sense of all the numpy/xarray complexity +# https://numpy.org/devdocs//user/basics.indexing.html +# https://numpy.org/neps/nep-0021-advanced-indexing.html +# https://docs.xarray.dev/en/latest/user-guide/indexing.html +# https://tutorial.xarray.dev/intermediate/indexing/advanced-indexing.html + +from pytensor.graph.basic import Apply, Constant, Variable +from pytensor.scalar.basic import discrete_dtypes +from pytensor.tensor.basic import as_tensor +from pytensor.tensor.type_other import NoneTypeT, SliceType, make_slice +from pytensor.xtensor.basic import XOp, xtensor_from_tensor +from pytensor.xtensor.type import XTensorType, as_xtensor, xtensor + + +def as_idx_variable(idx, indexed_dim: str): + if idx is None or (isinstance(idx, Variable) and isinstance(idx.type, NoneTypeT)): + raise TypeError( + "XTensors do not support indexing with None (np.newaxis), use expand_dims instead" + ) + if isinstance(idx, slice): + idx = make_slice(idx) + elif isinstance(idx, Variable) and isinstance(idx.type, SliceType): + pass + elif ( + isinstance(idx, tuple) + and len(idx) == 2 + and ( + isinstance(idx[0], str) + or ( + isinstance(idx[0], tuple | list) + and all(isinstance(d, str) for d in idx[0]) + ) + ) + ): + # Special case for ("x", array) that xarray supports + dim, idx = idx + if isinstance(idx, Variable) and isinstance(idx.type, XTensorType): + raise IndexError( + f"Giving a dimension name to an XTensorVariable indexer is not supported: {(dim, idx)}. " + "Use .rename() instead." + ) + if isinstance(dim, str): + dims = (dim,) + else: + dims = tuple(dim) + idx = as_xtensor(as_tensor(idx), dims=dims) + else: + # Must be integer / boolean indices, we already counted for None and slices + try: + idx = as_xtensor(idx) + except TypeError: + idx = as_tensor(idx) + if idx.type.ndim > 1: + # Same error that xarray raises + raise IndexError( + "Unlabeled multi-dimensional array cannot be used for indexing" + ) + # This is implicitly an XTensorVariable with dim matching the indexed one + idx = xtensor_from_tensor(idx, dims=(indexed_dim,)[: idx.type.ndim]) + + if idx.type.dtype == "bool": + if idx.type.ndim != 1: + # xarray allaws `x[True]`, but I think it is a bug: https://github.com/pydata/xarray/issues/10379 + # Otherwise, it is always restricted to 1d boolean indexing arrays + raise NotImplementedError( + "Only 1d boolean indexing arrays are supported" + ) + if idx.type.dims != (indexed_dim,): + raise IndexError( + "Boolean indexer should be unlabeled or on the same dimension to the indexed array. " + f"Indexer is on {idx.type.dims} but the target dimension is {indexed_dim}." + ) + + # Convert to nonzero indices + idx = as_xtensor(idx.values.nonzero()[0], dims=idx.type.dims) + + elif idx.type.dtype not in discrete_dtypes: + raise TypeError("Numerical indices must be integers or boolean") + return idx + + +def get_static_slice_length(slc: Variable, dim_length: None | int) -> int | None: + if dim_length is None: + return None + if isinstance(slc, Constant): + d = slc.data + start, stop, step = d.start, d.stop, d.step + elif slc.owner is None: + # It's a root variable no way of knowing what we're getting + return None + else: + # It's a MakeSliceOp + start, stop, step = slc.owner.inputs + if isinstance(start, Constant): + start = start.data + else: + return None + if isinstance(stop, Constant): + stop = stop.data + else: + return None + if isinstance(step, Constant): + step = step.data + else: + return None + return len(range(*slice(start, stop, step).indices(dim_length))) + + +class Index(XOp): + __props__ = () + + def make_node(self, x, *idxs): + x = as_xtensor(x) + + if any(idx is Ellipsis for idx in idxs): + if idxs.count(Ellipsis) > 1: + raise IndexError("an index can only have a single ellipsis ('...')") + # Convert intermediate Ellipsis to slice(None) + ellipsis_loc = idxs.index(Ellipsis) + n_implied_none_slices = x.type.ndim - (len(idxs) - 1) + idxs = ( + *idxs[:ellipsis_loc], + *((slice(None),) * n_implied_none_slices), + *idxs[ellipsis_loc + 1 :], + ) + + x_ndim = x.type.ndim + x_dims = x.type.dims + x_shape = x.type.shape + out_dims = [] + out_shape = [] + + def combine_dim_info(idx_dim, idx_dim_shape): + if idx_dim not in out_dims: + # First information about the dimension length + out_dims.append(idx_dim) + out_shape.append(idx_dim_shape) + else: + # Dim already introduced in output by a previous index + # Update static shape or raise if incompatible + out_dim_pos = out_dims.index(idx_dim) + out_dim_shape = out_shape[out_dim_pos] + if out_dim_shape is None: + # We don't know the size of the dimension yet + out_shape[out_dim_pos] = idx_dim_shape + elif idx_dim_shape is not None and idx_dim_shape != out_dim_shape: + raise IndexError( + f"Dimension of indexers mismatch for dim {idx_dim}" + ) + + if len(idxs) > x_ndim: + raise IndexError("Too many indices") + + idxs = [ + as_idx_variable(idx, dim) for idx, dim in zip(idxs, x_dims, strict=False) + ] + + for i, idx in enumerate(idxs): + if isinstance(idx.type, SliceType): + idx_dim = x_dims[i] + idx_dim_shape = get_static_slice_length(idx, x_shape[i]) + combine_dim_info(idx_dim, idx_dim_shape) + else: + if idx.type.ndim == 0: + # Scalar index, dimension is dropped + continue + + assert isinstance(idx.type, XTensorType) + + idx_dims = idx.type.dims + for idx_dim in idx_dims: + idx_dim_shape = idx.type.shape[idx_dims.index(idx_dim)] + combine_dim_info(idx_dim, idx_dim_shape) + + for dim_i, shape_i in zip(x_dims[i + 1 :], x_shape[i + 1 :]): + # Add back any unindexed dimensions + if dim_i not in out_dims: + # If the dimension was not indexed, we keep it as is + combine_dim_info(dim_i, shape_i) + + output = xtensor(dtype=x.type.dtype, shape=out_shape, dims=out_dims) + return Apply(self, [x, *idxs], [output]) + + +index = Index() diff --git a/pytensor/xtensor/math.py b/pytensor/xtensor/math.py index 73ee5c8c7b..0e9d66f232 100644 --- a/pytensor/xtensor/math.py +++ b/pytensor/xtensor/math.py @@ -1,8 +1,13 @@ import inspect import sys +import numpy as np + import pytensor.scalar as ps +from pytensor import config from pytensor.scalar import ScalarOp +from pytensor.scalar.basic import _cast_mapping +from pytensor.xtensor.basic import as_xtensor from pytensor.xtensor.vectorization import XElemwise @@ -29,3 +34,26 @@ def get_all_scalar_ops(): for name, op in get_all_scalar_ops().items(): setattr(this_module, name, op) + + +_xelemwise_cast_op: dict[str, XElemwise] = {} + + +def cast(x, dtype): + if dtype == "floatX": + dtype = config.floatX + else: + dtype = np.dtype(dtype).name + + x = as_xtensor(x) + if x.type.dtype == dtype: + return x + if x.type.dtype.startswith("complex") and not dtype.startswith("complex"): + raise TypeError( + "Casting from complex to real is ambiguous: consider" + " real(), imag(), angle() or abs()" + ) + + if dtype not in _xelemwise_cast_op: + _xelemwise_cast_op[dtype] = XElemwise(scalar_op=_cast_mapping[dtype]) + return _xelemwise_cast_op[dtype](x) diff --git a/pytensor/xtensor/rewriting/__init__.py b/pytensor/xtensor/rewriting/__init__.py index 7ce55b9256..a65ad0db85 100644 --- a/pytensor/xtensor/rewriting/__init__.py +++ b/pytensor/xtensor/rewriting/__init__.py @@ -1,4 +1,5 @@ import pytensor.xtensor.rewriting.basic +import pytensor.xtensor.rewriting.indexing import pytensor.xtensor.rewriting.reduction import pytensor.xtensor.rewriting.shape import pytensor.xtensor.rewriting.vectorization diff --git a/pytensor/xtensor/rewriting/indexing.py b/pytensor/xtensor/rewriting/indexing.py new file mode 100644 index 0000000000..70f232ffb1 --- /dev/null +++ b/pytensor/xtensor/rewriting/indexing.py @@ -0,0 +1,150 @@ +from itertools import zip_longest + +from pytensor import as_symbolic +from pytensor.graph import Constant, node_rewriter +from pytensor.tensor import TensorType, arange, specify_shape +from pytensor.tensor.subtensor import _non_consecutive_adv_indexing +from pytensor.tensor.type_other import NoneTypeT, SliceType +from pytensor.xtensor.basic import tensor_from_xtensor, xtensor_from_tensor +from pytensor.xtensor.indexing import Index +from pytensor.xtensor.rewriting.utils import register_xcanonicalize +from pytensor.xtensor.type import XTensorType + + +def to_basic_idx(idx): + if isinstance(idx.type, SliceType): + if isinstance(idx, Constant): + return idx.data + elif idx.owner: + # MakeSlice Op + # We transform NoneConsts to regular None so that basic Subtensor can be used if possible + return slice( + *[ + None if isinstance(i.type, NoneTypeT) else i + for i in idx.owner.inputs + ] + ) + else: + return idx + if ( + isinstance(idx.type, XTensorType) + and idx.type.ndim == 0 + and idx.type.dtype != bool + ): + return idx.values + raise TypeError("Cannot convert idx to basic idx") + + +@register_xcanonicalize +@node_rewriter(tracks=[Index]) +def lower_index(fgraph, node): + """Lower XTensorVariable indexing to regular TensorVariable indexing. + + xarray-like indexing has two modes: + 1. Orthogonal indexing: Indices of different output labeled dimensions are combined to produce all combinations of indices. + 2. Vectorized indexing: Indices of the same output labeled dimension are combined point-wise like in regular numpy advanced indexing. + + An Index Op can combine both modes. + To achieve orthogonal indexing using numpy semantics we must use multidimensional advanced indexing. + We expand the dims of each index so they are as large as the number of output dimensions, place the indices that + belong to the same output dimension in the same axis, and those that belong to different output dimensions in different axes. + + For instance to do an outer 2x2 indexing we can select x[arange(x.shape[0])[:, None], arange(x.shape[1])[None, :]], + This is a generalization of `np.ix_` that allows combining some dimensions, and not others, as well as have + indices that have more than one dimension at the start. + + In addition, xarray basic index (slices), can be vectorized with other advanced indices (if they act on the same output dimension). + However, in numpy, basic indices are always orthogonal to advanced indices. To make them behave like vectorized indices + we have to convert the slices to equivalent advanced indices. + We do this by creating an `arange` tensor that matches the shape of the dimension being indexed, + and then indexing it with the original slice. This index is then handled as a regular advanced index. + + Note: The IndexOp has only 2 types of indices: Slices and XTensorVariables. Regular array indices + are converted to the appropriate XTensorVariable by `Index.make_node` + """ + + x, *idxs = node.inputs + [out] = node.outputs + x_tensor = tensor_from_xtensor(x) + + if all( + ( + isinstance(idx.type, SliceType) + or (isinstance(idx.type, XTensorType) and idx.type.ndim == 0) + ) + for idx in idxs + ): + # Special case having just basic indexing + x_tensor_indexed = x_tensor[tuple(to_basic_idx(idx) for idx in idxs)] + + else: + # General case, we have to align the indices positionally to achieve vectorized or orthogonal indexing + # May need to convert basic indexing to advanced indexing if it acts on a dimension that is also indexed by an advanced index + x_dims = x.type.dims + x_shape = tuple(x.shape) + out_ndim = out.type.ndim + out_dims = out.type.dims + aligned_idxs = [] + basic_idx_axis = [] + # zip_longest adds the implicit slice(None) + for i, (idx, x_dim) in enumerate( + zip_longest(idxs, x_dims, fillvalue=as_symbolic(slice(None))) + ): + if isinstance(idx.type, SliceType): + if not any( + ( + isinstance(other_idx.type, XTensorType) + and x_dim in other_idx.dims + ) + for j, other_idx in enumerate(idxs) + if j != i + ): + # We can use basic indexing directly if no other index acts on this dimension + # This is an optimization that avoids creating an unnecessary arange tensor + # and facilitates the use of the specialized AdvancedSubtensor1 when possible + aligned_idxs.append(idx) + basic_idx_axis.append(out_dims.index(x_dim)) + else: + # Otherwise we need to convert the basic index into an equivalent advanced indexing + # And align it so it interacts correctly with the other advanced indices + adv_idx_equivalent = arange(x_shape[i])[to_basic_idx(idx)] + ds_order = ["x"] * out_ndim + ds_order[out_dims.index(x_dim)] = 0 + aligned_idxs.append(adv_idx_equivalent.dimshuffle(ds_order)) + else: + assert isinstance(idx.type, XTensorType) + if idx.type.ndim == 0: + # Scalar index, we can use it directly + aligned_idxs.append(idx.values) + else: + # Vector index, we need to align the indexing dimensions with the base_dims + ds_order = ["x"] * out_ndim + for j, idx_dim in enumerate(idx.dims): + ds_order[out_dims.index(idx_dim)] = j + aligned_idxs.append(idx.values.dimshuffle(ds_order)) + + # Squeeze indexing dimensions that were not used because we kept basic indexing slices + if basic_idx_axis: + aligned_idxs = [ + idx.squeeze(axis=basic_idx_axis) + if (isinstance(idx.type, TensorType) and idx.type.ndim > 0) + else idx + for idx in aligned_idxs + ] + + x_tensor_indexed = x_tensor[tuple(aligned_idxs)] + + if basic_idx_axis and _non_consecutive_adv_indexing(aligned_idxs): + # Numpy moves advanced indexing dimensions to the front when they are not consecutive + # We need to transpose them back to the expected output order + x_tensor_indexed_basic_dims = [out_dims[axis] for axis in basic_idx_axis] + x_tensor_indexed_dims = [ + dim for dim in out_dims if dim not in x_tensor_indexed_basic_dims + ] + x_tensor_indexed_basic_dims + transpose_order = [x_tensor_indexed_dims.index(dim) for dim in out_dims] + x_tensor_indexed = x_tensor_indexed.transpose(transpose_order) + + # Add lost shape information + x_tensor_indexed = specify_shape(x_tensor_indexed, out.type.shape) + new_out = xtensor_from_tensor(x_tensor_indexed, dims=out.type.dims) + return [new_out] diff --git a/pytensor/xtensor/type.py b/pytensor/xtensor/type.py index eda02febe0..adb7218147 100644 --- a/pytensor/xtensor/type.py +++ b/pytensor/xtensor/type.py @@ -1,3 +1,5 @@ +import warnings + from pytensor.tensor import TensorType from pytensor.tensor.math import variadic_mul @@ -10,7 +12,7 @@ XARRAY_AVAILABLE = False from collections.abc import Sequence -from typing import Literal, TypeVar +from typing import Any, Literal, TypeVar import numpy as np @@ -49,6 +51,10 @@ def __init__( self.shape = (None,) * len(self.dims) else: self.shape = tuple(shape) + if len(self.shape) != len(self.dims): + raise ValueError( + f"Shape {self.shape} must have the same length as dims {self.dims}" + ) self.ndim = len(self.dims) self.name = name @@ -347,7 +353,102 @@ def sel(self, *args, **kwargs): raise NotImplementedError("sel not implemented for XTensorVariable") def __getitem__(self, idx): - raise NotImplementedError("Indexing not yet implemnented") + if isinstance(idx, dict): + return self.isel(idx) + + if not isinstance(idx, tuple): + idx = (idx,) + + return px.indexing.index(self, *idx) + + def isel( + self, + indexers: dict[str, Any] | None = None, + drop: bool = False, # Unused by PyTensor + missing_dims: Literal["raise", "warn", "ignore"] = "raise", + **indexers_kwargs, + ): + if indexers_kwargs: + if indexers is not None: + raise ValueError( + "Cannot pass both indexers and indexers_kwargs to isel" + ) + indexers = indexers_kwargs + + if missing_dims not in {"raise", "warn", "ignore"}: + raise ValueError( + f"Unrecognized options {missing_dims} for missing_dims argument" + ) + + # Sort indices and pass them to index + dims = self.type.dims + indices = [slice(None)] * self.type.ndim + for key, idx in indexers.items(): + if idx is Ellipsis: + # Xarray raises a less informative error, suggesting indices must be integer + # But slices are also fine + raise TypeError("Ellipsis (...) is an invalid labeled index") + try: + indices[dims.index(key)] = idx + except IndexError: + if missing_dims == "raise": + raise ValueError( + f"Dimension {key} does not exist. Expected one of {dims}" + ) + elif missing_dims == "warn": + warnings.warn( + f"Dimension {key} does not exist. Expected one of {dims}", + UserWarning, + ) + + return px.indexing.index(self, *indices) + + def _head_tail_or_thin( + self, + indexers: dict[str, Any] | int | None, + indexers_kwargs: dict[str, Any], + *, + kind: Literal["head", "tail", "thin"], + ): + if indexers_kwargs: + if indexers is not None: + raise ValueError( + "Cannot pass both indexers and indexers_kwargs to head" + ) + indexers = indexers_kwargs + + if indexers is None: + if kind == "thin": + raise TypeError( + "thin() indexers must be either dict-like or a single integer" + ) + else: + # Default to 5 for head and tail + indexers = {dim: 5 for dim in self.type.dims} + + elif not isinstance(indexers, dict): + indexers = {dim: indexers for dim in self.type.dims} + + if kind == "head": + indices = {dim: slice(None, value) for dim, value in indexers.items()} + elif kind == "tail": + sizes = self.sizes + # Can't use slice(-value, None), in case value is zero + indices = { + dim: slice(sizes[dim] - value, None) for dim, value in indexers.items() + } + elif kind == "thin": + indices = {dim: slice(None, None, value) for dim, value in indexers.items()} + return self.isel(indices) + + def head(self, indexers: dict[str, Any] | int | None = None, **indexers_kwargs): + return self._head_tail_or_thin(indexers, indexers_kwargs, kind="head") + + def tail(self, indexers: dict[str, Any] | int | None = None, **indexers_kwargs): + return self._head_tail_or_thin(indexers, indexers_kwargs, kind="tail") + + def thin(self, indexers: dict[str, Any] | int | None = None, **indexers_kwargs): + return self._head_tail_or_thin(indexers, indexers_kwargs, kind="thin") # ndarray methods # https://docs.xarray.dev/en/latest/api.html#id7 @@ -500,7 +601,9 @@ def xtensor_constant(x, name=None, dims: None | Sequence[str] = None): if x_data.ndim == 0: x_dims = () else: - "Cannot convert TensorLike constant to XTensorConstant without specifying dims." + raise TypeError( + "Cannot convert TensorLike constant to XTensorConstant without specifying dims." + ) try: return XTensorConstant( XTensorType(dtype=x_data.dtype, dims=x_dims, shape=x_data.shape), diff --git a/pytensor/xtensor/vectorization.py b/pytensor/xtensor/vectorization.py index 7591698dd6..1fe7dd99d7 100644 --- a/pytensor/xtensor/vectorization.py +++ b/pytensor/xtensor/vectorization.py @@ -36,7 +36,10 @@ def make_node(self, *inputs): # Keep the non-None shape dims_and_shape[dim] = dim_length - output_dims, output_shape = zip(*dims_and_shape.items()) + if dims_and_shape: + output_dims, output_shape = zip(*dims_and_shape.items()) + else: + output_dims, output_shape = (), () dummy_scalars = [ps.get_scalar_type(inp.type.dtype)() for inp in inputs] output_dtypes = [ diff --git a/tests/xtensor/test_indexing.py b/tests/xtensor/test_indexing.py new file mode 100644 index 0000000000..e00adb4d86 --- /dev/null +++ b/tests/xtensor/test_indexing.py @@ -0,0 +1,348 @@ +import re + +import numpy as np +import pytest +from xarray import DataArray + +from pytensor.tensor import tensor +from pytensor.xtensor import xtensor +from tests.xtensor.util import xr_arange_like, xr_assert_allclose, xr_function + + +@pytest.mark.parametrize( + "indices", + [ + (0,), + (slice(1, None),), + (slice(None, -1),), + (slice(None, None, -1),), + (0, slice(None), -1, slice(1, None)), + (..., 0, -1), + (0, ..., -1), + (0, -1, ...), + ], +) +@pytest.mark.parametrize("labeled", (False, True), ids=["unlabeled", "labeled"]) +def test_basic_indexing(labeled, indices): + if ... in indices and labeled: + pytest.skip("Ellipsis not supported with labeled indexing") + + dims = ("a", "b", "c", "d") + x = xtensor(dims=dims, shape=(2, 3, 5, 7)) + + if labeled: + shufled_dims = tuple(np.random.permutation(dims)) + indices = dict(zip(shufled_dims, indices, strict=False)) + out = x[indices] + + fn = xr_function([x], out) + x_test_values = np.arange(np.prod(x.type.shape), dtype=x.type.dtype).reshape( + x.type.shape + ) + x_test = DataArray(x_test_values, dims=x.type.dims) + res = fn(x_test) + expected_res = x_test[indices] + xr_assert_allclose(res, expected_res) + + +def test_single_adv_indexing_on_existing_dim(): + x = xtensor(dims=("a", "b"), shape=(3, 5)) + idx = tensor("idx", dtype=int, shape=(4,)) + xidx = xtensor("idx", dtype=int, shape=(4,), dims=("a",)) + + x_test = xr_arange_like(x) + idx_test = np.array([0, 1, 0, 2], dtype=int) + xidx_test = DataArray(idx_test, dims=("a",)) + + # Equivalent ways of indexing a->a + y = x[idx] + fn = xr_function([x, idx], y) + res = fn(x_test, idx_test) + expected_res = x_test[idx_test] + xr_assert_allclose(res, expected_res) + + y = x[(("a", idx),)] + fn = xr_function([x, idx], y) + res = fn(x_test, idx_test) + expected_res = x_test[(("a", idx_test),)] + xr_assert_allclose(res, expected_res) + + y = x[((("a",), idx),)] + fn = xr_function([x, idx], y) + res = fn(x_test, idx_test) + expected_res = x_test[((("a",), idx_test),)] + xr_assert_allclose(res, expected_res) + + y = x[xidx] + fn = xr_function([x, xidx], y) + res = fn(x_test, xidx_test) + expected_res = x_test[xidx_test] + xr_assert_allclose(res, expected_res) + + +def test_single_vector_indexing_on_new_dim(): + x = xtensor(dims=("a", "b"), shape=(3, 5)) + idx = tensor("idx", dtype=int, shape=(4,)) + xidx = xtensor("idx", dtype=int, shape=(4,), dims=("new_a",)) + + x_test = xr_arange_like(x) + idx_test = np.array([0, 1, 0, 2], dtype=int) + xidx_test = DataArray(idx_test, dims=("new_a",)) + + # Equivalent ways of indexing a->new_a + y = x[(("new_a", idx),)] + fn = xr_function([x, idx], y) + res = fn(x_test, idx_test) + expected_res = x_test[(("new_a", idx_test),)] + xr_assert_allclose(res, expected_res) + + y = x[((["new_a"], idx),)] + fn = xr_function([x, idx], y) + res = fn(x_test, idx_test) + expected_res = x_test[((["new_a"], idx_test),)] + xr_assert_allclose(res, expected_res) + + y = x[xidx] + fn = xr_function([x, xidx], y) + res = fn(x_test, xidx_test) + expected_res = x_test[xidx_test] + xr_assert_allclose(res, expected_res) + + +def test_single_vector_indexing_interacting_with_existing_dim(): + x = xtensor(dims=("a", "b"), shape=(3, 5)) + idx = tensor("idx", dtype=int, shape=(4,)) + xidx = xtensor("idx", dtype=int, shape=(4,), dims=("a",)) + + x_test = xr_arange_like(x) + idx_test = np.array([0, 1, 0, 2], dtype=int) + xidx_test = DataArray(idx_test, dims=("a",)) + + # Two equivalent ways of indexing a->b + # By labeling the index on a, as "b", we cause pointwise indexing between the two dimensions. + y = x[("b", idx), 1:] + fn = xr_function([x, idx], y) + res = fn(x_test, idx_test) + expected_res = x_test[("b", idx_test), 1:] + xr_assert_allclose(res, expected_res) + + y = x[xidx.rename(a="b"), 1:] + fn = xr_function([x, xidx], y) + res = fn(x_test, xidx_test) + expected_res = x_test[xidx_test.rename(a="b"), 1:] + xr_assert_allclose(res, expected_res) + + +@pytest.mark.parametrize( + "dims_order", + [ + ("a", "b", "ar", "br", "o"), + ("o", "br", "ar", "b", "a"), + ("a", "b", "o", "ar", "br"), + ("a", "o", "ar", "b", "br"), + ], +) +def test_multiple_vector_indexing(dims_order): + x = xtensor(dims=dims_order, shape=(5, 7, 11, 13, 17)) + idx_a = xtensor("idx_a", dtype=int, shape=(4,), dims=("a",)) + idx_b = xtensor("idx_b", dtype=int, shape=(3,), dims=("b",)) + + idxs = [slice(None)] * 5 + idxs[x.type.dims.index("a")] = idx_a + idxs[x.type.dims.index("b")] = idx_b + idxs[x.type.dims.index("ar")] = idx_a[::-1] + idxs[x.type.dims.index("br")] = idx_b[::-1] + + out = x[tuple(idxs)] + fn = xr_function([x, idx_a, idx_b], out) + + x_test = xr_arange_like(x) + idx_a_test = DataArray(np.array([0, 1, 0, 2], dtype=int), dims=("a",)) + idx_b_test = DataArray(np.array([1, 3, 0], dtype=int), dims=("b",)) + res = fn(x_test, idx_a_test, idx_b_test) + idxs_test = [slice(None)] * 5 + idxs_test[x.type.dims.index("a")] = idx_a_test + idxs_test[x.type.dims.index("b")] = idx_b_test + idxs_test[x.type.dims.index("ar")] = idx_a_test[::-1] + idxs_test[x.type.dims.index("br")] = idx_b_test[::-1] + expected_res = x_test[tuple(idxs_test)] + xr_assert_allclose(res, expected_res) + + +def test_matrix_indexing(): + x = xtensor(dims=("a", "b", "c"), shape=(3, 5, 7)) + idx_ab = xtensor("idx_ab", dtype=int, shape=(4, 2), dims=("a", "b")) + idx_cd = xtensor("idx_cd", dtype=int, shape=(4, 3), dims=("c", "d")) + + out = x[idx_ab, slice(1, 3), idx_cd] + fn = xr_function([x, idx_ab, idx_cd], out) + + x_test = xr_arange_like(x) + idx_ab_test = DataArray( + np.array([[0, 1], [1, 2], [0, 2], [-1, -2]], dtype=int), dims=("a", "b") + ) + idx_cd_test = DataArray( + np.array([[1, 2, 3], [0, 4, 5], [2, 6, -1], [3, -2, 0]], dtype=int), + dims=("c", "d"), + ) + res = fn(x_test, idx_ab_test, idx_cd_test) + expected_res = x_test[idx_ab_test, slice(1, 3), idx_cd_test] + xr_assert_allclose(res, expected_res) + + +def test_assign_multiple_out_dims(): + x = xtensor("x", shape=(5, 7), dims=("a", "b")) + idx1 = tensor("idx1", dtype=int, shape=(4, 3)) + idx2 = tensor("idx2", dtype=int, shape=(3, 2)) + out = x[(("out1", "out2"), idx1), (["out2", "out3"], idx2)] + + fn = xr_function([x, idx1, idx2], out) + + rng = np.random.default_rng() + x_test = xr_arange_like(x) + idx1_test = rng.binomial(n=4, p=0.5, size=(4, 3)) + idx2_test = rng.binomial(n=4, p=0.5, size=(3, 2)) + res = fn(x_test, idx1_test, idx2_test) + expected_res = x_test[(("out1", "out2"), idx1_test), (["out2", "out3"], idx2_test)] + xr_assert_allclose(res, expected_res) + + +def test_assign_indexer_dims_fails(): + # Test cases where the implicit naming of the indexer dimensions is not allowed. + x = xtensor("x", shape=(5, 7), dims=("a", "b")) + idx1 = xtensor("idx1", dtype=int, shape=(4,), dims=("c",)) + + with pytest.raises( + IndexError, + match=re.escape( + "Giving a dimension name to an XTensorVariable indexer is not supported: ('d', idx1). " + "Use .rename() instead." + ), + ): + x[("d", idx1),] + + with pytest.raises( + IndexError, + match=re.escape( + "Boolean indexer should be unlabeled or on the same dimension to the indexed array. " + "Indexer is on ('c',) but the target dimension is a." + ), + ): + x[idx1.astype("bool")] + + +class TestVectorizedIndexingNotAllowedToBroadcast: + def test_compile_time_error(self): + x = xtensor(dims=("a", "b"), shape=(3, 5)) + idx_a = xtensor("idx_a", dtype=int, shape=(4,), dims=("b",)) + idx_b = xtensor("idx_b", dtype=int, shape=(1,), dims=("b",)) + with pytest.raises( + IndexError, match="Dimension of indexers mismatch for dim b" + ): + x[idx_a, idx_b] + + @pytest.mark.xfail( + reason="Check that lowered indexing is not allowed to broadcast not implemented yet" + ) + def test_runtime_error(self): + """ + Test that, unlike in numpy, indices with different shapes cannot act on the same dimension, + even if the shapes could broadcast as per numpy semantics. + """ + x = xtensor(dims=("a", "b"), shape=(3, 5)) + idx_a = xtensor("idx_a", dtype=int, shape=(None,), dims=("b",)) + idx_b = xtensor("idx_b", dtype=int, shape=(None,), dims=("b",)) + out = x[idx_a, idx_b] + + fn = xr_function([x, idx_a, idx_b], out) + + x_test = xr_arange_like(x) + valid_idx_a_test = DataArray(np.array([0], dtype=int), dims=("b",)) + idx_b_test = DataArray(np.array([1], dtype=int), dims=("b",)) + xr_assert_allclose( + fn(x_test, valid_idx_a_test, idx_b_test), + x_test[valid_idx_a_test, idx_b_test], + ) + + invalid_idx_a_test = DataArray(np.array([0, 1, 0, 1], dtype=int), dims=("b",)) + with pytest.raises(ValueError): + fn(x_test, invalid_idx_a_test, idx_b_test) + + +@pytest.mark.parametrize( + "dims_order", + [ + ("a", "b", "c", "d"), + ("d", "c", "b", "a"), + ("c", "a", "b", "d"), + ], +) +def test_scalar_integer_indexing(dims_order): + x = xtensor(dims=dims_order, shape=(3, 5, 7, 11)) + scalar_idx = xtensor("scalar_idx", dtype=int, shape=(), dims=()) + vec_idx1 = xtensor("vec_idx", dtype=int, shape=(4,), dims=("a",)) + vec_idx2 = xtensor("vec_idx2", dtype=int, shape=(4,), dims=("c",)) + + idxs = [None] * 4 + idxs[x.type.dims.index("a")] = scalar_idx + idxs[x.type.dims.index("b")] = vec_idx1 + idxs[x.type.dims.index("c")] = vec_idx2 + idxs[x.type.dims.index("d")] = -scalar_idx + out1 = x[tuple(idxs)] + + idxs[x.type.dims.index("a")] = vec_idx1.rename(a="c") + out2 = x[tuple(idxs)] + + fn = xr_function([x, scalar_idx, vec_idx1, vec_idx2], (out1, out2)) + + x_test = xr_arange_like(x) + scalar_idx_test = DataArray(np.array(1, dtype=int), dims=()) + vec_idx_test1 = DataArray(np.array([0, 1, 0, 2], dtype=int), dims=("a",)) + vec_idx_test2 = DataArray(np.array([0, 2, 2, 1], dtype=int), dims=("c",)) + res1, res2 = fn(x_test, scalar_idx_test, vec_idx_test1, vec_idx_test2) + idxs = [None] * 4 + idxs[x.type.dims.index("a")] = scalar_idx_test + idxs[x.type.dims.index("b")] = vec_idx_test1 + idxs[x.type.dims.index("c")] = vec_idx_test2 + idxs[x.type.dims.index("d")] = -scalar_idx_test + expected_res1 = x_test[tuple(idxs)] + idxs[x.type.dims.index("a")] = vec_idx_test1.rename(a="c") + expected_res2 = x_test[tuple(idxs)] + xr_assert_allclose(res1, expected_res1) + xr_assert_allclose(res2, expected_res2) + + +def test_unsupported_boolean_indexing(): + x = xtensor(dims=("a", "b"), shape=(3, 5)) + + mat_idx = xtensor("idx", dtype=bool, shape=(4, 2), dims=("a", "b")) + scalar_idx = mat_idx.isel(a=0, b=1) + + for idx in (mat_idx, scalar_idx, scalar_idx.values): + with pytest.raises( + NotImplementedError, + match="Only 1d boolean indexing arrays are supported", + ): + x[idx] + + +def test_boolean_indexing(): + x = xtensor("x", shape=(8, 7), dims=("a", "b")) + bool_idx = xtensor("bool_idx", dtype=bool, shape=(8,), dims=("a",)) + int_idx = xtensor("int_idx", dtype=int, shape=(4, 3), dims=("a", "new_dim")) + + out_vectorized = x[bool_idx, int_idx] + out_orthogonal = x[bool_idx, int_idx.rename(a="b")] + fn = xr_function([x, bool_idx, int_idx], [out_vectorized, out_orthogonal]) + + x_test = xr_arange_like(x) + bool_idx_test = DataArray(np.array([True, False] * 4, dtype=bool), dims=("a",)) + int_idx_test = DataArray( + np.random.binomial(n=4, p=0.5, size=(4, 3)), + dims=("a", "new_dim"), + ) + res1, res2 = fn(x_test, bool_idx_test, int_idx_test) + expected_res1 = x_test[bool_idx_test, int_idx_test] + expected_res2 = x_test[bool_idx_test, int_idx_test.rename(a="b")] + xr_assert_allclose(res1, expected_res1) + xr_assert_allclose(res2, expected_res2) diff --git a/tests/xtensor/test_math.py b/tests/xtensor/test_math.py index 4fecb9665c..0631df935b 100644 --- a/tests/xtensor/test_math.py +++ b/tests/xtensor/test_math.py @@ -1,5 +1,6 @@ # ruff: noqa: E402 import pytest +from xtensor.util import xr_arange_like pytest.importorskip("xarray") # @@ -14,6 +15,18 @@ from tests.xtensor.util import xr_assert_allclose, xr_function +def test_scalar_case(): + x = xtensor("x", dims=(), shape=()) + y = xtensor("y", dims=(), shape=()) + out = add(x, y) + + fn = function([x, y], out) + + x_test = DataArray(2.0, dims=()) + y_test = DataArray(3.0, dims=()) + np.testing.assert_allclose(fn(x_test.values, y_test.values), 5.0) + + def test_dimension_alignment(): x = xtensor("x", dims=("city", "country", "planet"), shape=(2, 3, 4)) y = xtensor( @@ -95,3 +108,21 @@ def test_multiple_constant(): res = fn(x_test) expected_res = np.exp(x_test * 2) + 2 np.testing.assert_allclose(res, expected_res) + + +def test_cast(): + x = xtensor("x", shape=(2, 3), dims=("a", "b"), dtype="float32") + yf64 = x.astype("float64") + yi16 = x.astype("int16") + ybool = x.astype("bool") + + fn = xr_function([x], [yf64, yi16, ybool]) + x_test = xr_arange_like(x) + res_f64, res_i16, res_bool = fn(x_test) + xr_assert_allclose(res_f64, x_test.astype("float64")) + xr_assert_allclose(res_i16, x_test.astype("int16")) + xr_assert_allclose(res_bool, x_test.astype("bool")) + + yc64 = x.astype("complex64") + with pytest.raises(TypeError, match="Casting from complex to real is ambiguous"): + yc64.astype("float64")