-
Notifications
You must be signed in to change notification settings - Fork 133
Implement indexing operations for XTensorVariables #1429
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
ricardoV94
merged 5 commits into
pymc-devs:labeled_tensors
from
ricardoV94:labeled_indexing
Jun 3, 2025
+854
−5
Merged
Changes from all commits
Commits
Show all changes
5 commits
Select commit
Hold shift + click to select a range
9169681
Fix scalar case in XElemwise
ricardoV94 4f67209
Check shape length matches dims in XTensorType
ricardoV94 09ef4a7
Fix bug in `xtensor_constant`
ricardoV94 1450796
Implement casting for XTensorVariables
ricardoV94 cdaa917
Implement index operations for XTensorVariables
ricardoV94 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,186 @@ | ||
# HERE LIE DRAGONS | ||
# Useful links to make sense of all the numpy/xarray complexity | ||
# https://numpy.org/devdocs//user/basics.indexing.html | ||
# https://numpy.org/neps/nep-0021-advanced-indexing.html | ||
# https://docs.xarray.dev/en/latest/user-guide/indexing.html | ||
# https://tutorial.xarray.dev/intermediate/indexing/advanced-indexing.html | ||
|
||
from pytensor.graph.basic import Apply, Constant, Variable | ||
from pytensor.scalar.basic import discrete_dtypes | ||
from pytensor.tensor.basic import as_tensor | ||
from pytensor.tensor.type_other import NoneTypeT, SliceType, make_slice | ||
from pytensor.xtensor.basic import XOp, xtensor_from_tensor | ||
from pytensor.xtensor.type import XTensorType, as_xtensor, xtensor | ||
|
||
|
||
def as_idx_variable(idx, indexed_dim: str): | ||
if idx is None or (isinstance(idx, Variable) and isinstance(idx.type, NoneTypeT)): | ||
raise TypeError( | ||
"XTensors do not support indexing with None (np.newaxis), use expand_dims instead" | ||
) | ||
if isinstance(idx, slice): | ||
idx = make_slice(idx) | ||
elif isinstance(idx, Variable) and isinstance(idx.type, SliceType): | ||
pass | ||
elif ( | ||
isinstance(idx, tuple) | ||
and len(idx) == 2 | ||
and ( | ||
isinstance(idx[0], str) | ||
or ( | ||
isinstance(idx[0], tuple | list) | ||
and all(isinstance(d, str) for d in idx[0]) | ||
) | ||
) | ||
): | ||
# Special case for ("x", array) that xarray supports | ||
dim, idx = idx | ||
if isinstance(idx, Variable) and isinstance(idx.type, XTensorType): | ||
raise IndexError( | ||
f"Giving a dimension name to an XTensorVariable indexer is not supported: {(dim, idx)}. " | ||
"Use .rename() instead." | ||
) | ||
if isinstance(dim, str): | ||
dims = (dim,) | ||
else: | ||
dims = tuple(dim) | ||
idx = as_xtensor(as_tensor(idx), dims=dims) | ||
else: | ||
# Must be integer / boolean indices, we already counted for None and slices | ||
try: | ||
idx = as_xtensor(idx) | ||
except TypeError: | ||
idx = as_tensor(idx) | ||
if idx.type.ndim > 1: | ||
# Same error that xarray raises | ||
raise IndexError( | ||
"Unlabeled multi-dimensional array cannot be used for indexing" | ||
) | ||
# This is implicitly an XTensorVariable with dim matching the indexed one | ||
idx = xtensor_from_tensor(idx, dims=(indexed_dim,)[: idx.type.ndim]) | ||
|
||
if idx.type.dtype == "bool": | ||
if idx.type.ndim != 1: | ||
# xarray allaws `x[True]`, but I think it is a bug: https://github.com/pydata/xarray/issues/10379 | ||
# Otherwise, it is always restricted to 1d boolean indexing arrays | ||
raise NotImplementedError( | ||
"Only 1d boolean indexing arrays are supported" | ||
) | ||
if idx.type.dims != (indexed_dim,): | ||
raise IndexError( | ||
"Boolean indexer should be unlabeled or on the same dimension to the indexed array. " | ||
f"Indexer is on {idx.type.dims} but the target dimension is {indexed_dim}." | ||
) | ||
|
||
# Convert to nonzero indices | ||
idx = as_xtensor(idx.values.nonzero()[0], dims=idx.type.dims) | ||
|
||
elif idx.type.dtype not in discrete_dtypes: | ||
raise TypeError("Numerical indices must be integers or boolean") | ||
return idx | ||
|
||
|
||
def get_static_slice_length(slc: Variable, dim_length: None | int) -> int | None: | ||
if dim_length is None: | ||
return None | ||
if isinstance(slc, Constant): | ||
d = slc.data | ||
start, stop, step = d.start, d.stop, d.step | ||
elif slc.owner is None: | ||
# It's a root variable no way of knowing what we're getting | ||
return None | ||
else: | ||
# It's a MakeSliceOp | ||
start, stop, step = slc.owner.inputs | ||
if isinstance(start, Constant): | ||
start = start.data | ||
else: | ||
return None | ||
if isinstance(stop, Constant): | ||
stop = stop.data | ||
else: | ||
return None | ||
if isinstance(step, Constant): | ||
step = step.data | ||
else: | ||
return None | ||
return len(range(*slice(start, stop, step).indices(dim_length))) | ||
|
||
|
||
class Index(XOp): | ||
__props__ = () | ||
|
||
def make_node(self, x, *idxs): | ||
x = as_xtensor(x) | ||
|
||
if any(idx is Ellipsis for idx in idxs): | ||
if idxs.count(Ellipsis) > 1: | ||
raise IndexError("an index can only have a single ellipsis ('...')") | ||
# Convert intermediate Ellipsis to slice(None) | ||
ellipsis_loc = idxs.index(Ellipsis) | ||
n_implied_none_slices = x.type.ndim - (len(idxs) - 1) | ||
idxs = ( | ||
*idxs[:ellipsis_loc], | ||
*((slice(None),) * n_implied_none_slices), | ||
*idxs[ellipsis_loc + 1 :], | ||
) | ||
|
||
x_ndim = x.type.ndim | ||
x_dims = x.type.dims | ||
x_shape = x.type.shape | ||
out_dims = [] | ||
out_shape = [] | ||
|
||
def combine_dim_info(idx_dim, idx_dim_shape): | ||
if idx_dim not in out_dims: | ||
# First information about the dimension length | ||
out_dims.append(idx_dim) | ||
out_shape.append(idx_dim_shape) | ||
else: | ||
# Dim already introduced in output by a previous index | ||
# Update static shape or raise if incompatible | ||
out_dim_pos = out_dims.index(idx_dim) | ||
out_dim_shape = out_shape[out_dim_pos] | ||
if out_dim_shape is None: | ||
# We don't know the size of the dimension yet | ||
out_shape[out_dim_pos] = idx_dim_shape | ||
elif idx_dim_shape is not None and idx_dim_shape != out_dim_shape: | ||
raise IndexError( | ||
f"Dimension of indexers mismatch for dim {idx_dim}" | ||
) | ||
|
||
if len(idxs) > x_ndim: | ||
raise IndexError("Too many indices") | ||
|
||
idxs = [ | ||
as_idx_variable(idx, dim) for idx, dim in zip(idxs, x_dims, strict=False) | ||
] | ||
|
||
for i, idx in enumerate(idxs): | ||
if isinstance(idx.type, SliceType): | ||
idx_dim = x_dims[i] | ||
idx_dim_shape = get_static_slice_length(idx, x_shape[i]) | ||
combine_dim_info(idx_dim, idx_dim_shape) | ||
else: | ||
if idx.type.ndim == 0: | ||
# Scalar index, dimension is dropped | ||
continue | ||
|
||
assert isinstance(idx.type, XTensorType) | ||
|
||
idx_dims = idx.type.dims | ||
for idx_dim in idx_dims: | ||
idx_dim_shape = idx.type.shape[idx_dims.index(idx_dim)] | ||
combine_dim_info(idx_dim, idx_dim_shape) | ||
|
||
for dim_i, shape_i in zip(x_dims[i + 1 :], x_shape[i + 1 :]): | ||
# Add back any unindexed dimensions | ||
if dim_i not in out_dims: | ||
# If the dimension was not indexed, we keep it as is | ||
combine_dim_info(dim_i, shape_i) | ||
|
||
output = xtensor(dtype=x.type.dtype, shape=out_shape, dims=out_dims) | ||
return Apply(self, [x, *idxs], [output]) | ||
|
||
|
||
index = Index() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
import pytensor.xtensor.rewriting.basic | ||
import pytensor.xtensor.rewriting.indexing | ||
import pytensor.xtensor.rewriting.reduction | ||
import pytensor.xtensor.rewriting.shape | ||
import pytensor.xtensor.rewriting.vectorization |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,150 @@ | ||
from itertools import zip_longest | ||
|
||
from pytensor import as_symbolic | ||
from pytensor.graph import Constant, node_rewriter | ||
from pytensor.tensor import TensorType, arange, specify_shape | ||
from pytensor.tensor.subtensor import _non_consecutive_adv_indexing | ||
from pytensor.tensor.type_other import NoneTypeT, SliceType | ||
from pytensor.xtensor.basic import tensor_from_xtensor, xtensor_from_tensor | ||
from pytensor.xtensor.indexing import Index | ||
from pytensor.xtensor.rewriting.utils import register_xcanonicalize | ||
from pytensor.xtensor.type import XTensorType | ||
|
||
|
||
def to_basic_idx(idx): | ||
if isinstance(idx.type, SliceType): | ||
if isinstance(idx, Constant): | ||
return idx.data | ||
elif idx.owner: | ||
# MakeSlice Op | ||
# We transform NoneConsts to regular None so that basic Subtensor can be used if possible | ||
return slice( | ||
*[ | ||
None if isinstance(i.type, NoneTypeT) else i | ||
for i in idx.owner.inputs | ||
] | ||
) | ||
else: | ||
return idx | ||
if ( | ||
isinstance(idx.type, XTensorType) | ||
and idx.type.ndim == 0 | ||
and idx.type.dtype != bool | ||
): | ||
return idx.values | ||
raise TypeError("Cannot convert idx to basic idx") | ||
|
||
|
||
@register_xcanonicalize | ||
@node_rewriter(tracks=[Index]) | ||
def lower_index(fgraph, node): | ||
"""Lower XTensorVariable indexing to regular TensorVariable indexing. | ||
|
||
xarray-like indexing has two modes: | ||
1. Orthogonal indexing: Indices of different output labeled dimensions are combined to produce all combinations of indices. | ||
2. Vectorized indexing: Indices of the same output labeled dimension are combined point-wise like in regular numpy advanced indexing. | ||
|
||
An Index Op can combine both modes. | ||
To achieve orthogonal indexing using numpy semantics we must use multidimensional advanced indexing. | ||
We expand the dims of each index so they are as large as the number of output dimensions, place the indices that | ||
belong to the same output dimension in the same axis, and those that belong to different output dimensions in different axes. | ||
|
||
For instance to do an outer 2x2 indexing we can select x[arange(x.shape[0])[:, None], arange(x.shape[1])[None, :]], | ||
This is a generalization of `np.ix_` that allows combining some dimensions, and not others, as well as have | ||
indices that have more than one dimension at the start. | ||
|
||
In addition, xarray basic index (slices), can be vectorized with other advanced indices (if they act on the same output dimension). | ||
However, in numpy, basic indices are always orthogonal to advanced indices. To make them behave like vectorized indices | ||
we have to convert the slices to equivalent advanced indices. | ||
We do this by creating an `arange` tensor that matches the shape of the dimension being indexed, | ||
and then indexing it with the original slice. This index is then handled as a regular advanced index. | ||
|
||
Note: The IndexOp has only 2 types of indices: Slices and XTensorVariables. Regular array indices | ||
are converted to the appropriate XTensorVariable by `Index.make_node` | ||
""" | ||
|
||
x, *idxs = node.inputs | ||
[out] = node.outputs | ||
x_tensor = tensor_from_xtensor(x) | ||
|
||
if all( | ||
( | ||
isinstance(idx.type, SliceType) | ||
or (isinstance(idx.type, XTensorType) and idx.type.ndim == 0) | ||
) | ||
for idx in idxs | ||
): | ||
# Special case having just basic indexing | ||
x_tensor_indexed = x_tensor[tuple(to_basic_idx(idx) for idx in idxs)] | ||
|
||
else: | ||
# General case, we have to align the indices positionally to achieve vectorized or orthogonal indexing | ||
# May need to convert basic indexing to advanced indexing if it acts on a dimension that is also indexed by an advanced index | ||
x_dims = x.type.dims | ||
x_shape = tuple(x.shape) | ||
out_ndim = out.type.ndim | ||
out_dims = out.type.dims | ||
aligned_idxs = [] | ||
basic_idx_axis = [] | ||
# zip_longest adds the implicit slice(None) | ||
for i, (idx, x_dim) in enumerate( | ||
zip_longest(idxs, x_dims, fillvalue=as_symbolic(slice(None))) | ||
): | ||
if isinstance(idx.type, SliceType): | ||
if not any( | ||
( | ||
isinstance(other_idx.type, XTensorType) | ||
and x_dim in other_idx.dims | ||
) | ||
for j, other_idx in enumerate(idxs) | ||
if j != i | ||
): | ||
# We can use basic indexing directly if no other index acts on this dimension | ||
# This is an optimization that avoids creating an unnecessary arange tensor | ||
# and facilitates the use of the specialized AdvancedSubtensor1 when possible | ||
aligned_idxs.append(idx) | ||
basic_idx_axis.append(out_dims.index(x_dim)) | ||
else: | ||
# Otherwise we need to convert the basic index into an equivalent advanced indexing | ||
# And align it so it interacts correctly with the other advanced indices | ||
adv_idx_equivalent = arange(x_shape[i])[to_basic_idx(idx)] | ||
ds_order = ["x"] * out_ndim | ||
ds_order[out_dims.index(x_dim)] = 0 | ||
aligned_idxs.append(adv_idx_equivalent.dimshuffle(ds_order)) | ||
else: | ||
assert isinstance(idx.type, XTensorType) | ||
if idx.type.ndim == 0: | ||
# Scalar index, we can use it directly | ||
aligned_idxs.append(idx.values) | ||
else: | ||
# Vector index, we need to align the indexing dimensions with the base_dims | ||
ds_order = ["x"] * out_ndim | ||
for j, idx_dim in enumerate(idx.dims): | ||
ds_order[out_dims.index(idx_dim)] = j | ||
aligned_idxs.append(idx.values.dimshuffle(ds_order)) | ||
|
||
# Squeeze indexing dimensions that were not used because we kept basic indexing slices | ||
if basic_idx_axis: | ||
aligned_idxs = [ | ||
idx.squeeze(axis=basic_idx_axis) | ||
if (isinstance(idx.type, TensorType) and idx.type.ndim > 0) | ||
else idx | ||
for idx in aligned_idxs | ||
] | ||
|
||
x_tensor_indexed = x_tensor[tuple(aligned_idxs)] | ||
|
||
if basic_idx_axis and _non_consecutive_adv_indexing(aligned_idxs): | ||
# Numpy moves advanced indexing dimensions to the front when they are not consecutive | ||
# We need to transpose them back to the expected output order | ||
x_tensor_indexed_basic_dims = [out_dims[axis] for axis in basic_idx_axis] | ||
x_tensor_indexed_dims = [ | ||
dim for dim in out_dims if dim not in x_tensor_indexed_basic_dims | ||
] + x_tensor_indexed_basic_dims | ||
transpose_order = [x_tensor_indexed_dims.index(dim) for dim in out_dims] | ||
x_tensor_indexed = x_tensor_indexed.transpose(transpose_order) | ||
|
||
# Add lost shape information | ||
x_tensor_indexed = specify_shape(x_tensor_indexed, out.type.shape) | ||
new_out = xtensor_from_tensor(x_tensor_indexed, dims=out.type.dims) | ||
return [new_out] |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.