-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
REF: mask values in loc.__setitem__ with bool indexer #45501
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
d7b22f3
9e73633
92a7e3a
0c4d64e
f237aac
94c6761
b91fc34
46f0f22
5b62674
65ec7d8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -53,6 +53,7 @@ | |
is_empty_indexer, | ||
is_exact_shape_match, | ||
is_list_like_indexer, | ||
is_scalar_indexer, | ||
length_of_indexer, | ||
) | ||
from pandas.core.indexes.api import ( | ||
|
@@ -669,6 +670,56 @@ def _get_setitem_indexer(self, key): | |
|
||
return self._convert_to_indexer(key, axis=0) | ||
|
||
@final | ||
def _maybe_mask_setitem_value(self, indexer, value): | ||
""" | ||
If we have obj.iloc[mask] = arraylike and arraylike has the same | ||
length as obj, we treat this as obj.iloc[mask] = arraylike[mask], | ||
similar to Series.__setitem__. | ||
""" | ||
# TODO: why do we only do this for loc, not iloc? | ||
# ser = pd.Series(range(5)) | ||
# mask = np.array([True, False, True, False, True]) | ||
# ser[mask] = ser * 2 # <- works | ||
# ser.loc[mask] = ser * 2 # <- works | ||
# ser.iloc[mask] = ser * 2 # <- raises | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. loc and setitem align, while iloc does not. Hence this should not work for iloc. This was changed for 1.2 I think and was intended |
||
|
||
if isinstance(indexer, tuple) and len(indexer) == 2: | ||
pi, icols = indexer | ||
ndim = getattr(value, "ndim", 0) | ||
if ndim == 0: | ||
pass | ||
elif com.is_bool_indexer(pi) and len(value) == len(pi): | ||
newkey = pi.nonzero()[0] | ||
|
||
if is_scalar_indexer(icols, self.ndim - 1) and ndim == 1: | ||
# e.g. test_loc_setitem_boolean_mask_allfalse | ||
value = value[pi] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as for the iloc case below
Should also set 10 |
||
indexer = (newkey, icols) | ||
|
||
elif ( | ||
isinstance(icols, np.ndarray) | ||
and icols.dtype.kind == "i" | ||
and len(icols) == 1 | ||
): | ||
if ndim == 1: | ||
value = value[pi] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same here as with iloc below.
This should set 10 instead of NaN |
||
indexer = (newkey, icols) | ||
# TODO: is it sketchy that icols is an ndarray and value | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is ok, something like
is valid and runs through there There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i guess. still seems weird as the analogous behavior with ndarray doesn't work
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is odd yes |
||
# is 1D? i.e. should we require value to be 2D with | ||
# value.shape[1] == 1? | ||
|
||
elif ndim == 2 and value.shape[1] == 1: | ||
if isinstance(value, ABCDataFrame): | ||
value = value.iloc[newkey] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This causes bugs:
This returns
on main, which is correct and
on this branch, which is wrong There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This has to be aligned before applying iloc There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. good catch, will update+test There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so using align_series/align_frame fixes the examples you've provided, but breaks test_loc_setitem_boolean_mask_allfalse, test_setitem_loc_only_false_indexer_dtype_changed, test_loc_setitem_all_false_boolean_two_blocks, all cases where mask.sum() == 0. ATM i can get the tests passing by putting in a check specific to that case, but its definitely a kludge There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hm, it looks like the align functions can not deal with all False boolean indexers if I understand this correctly. This looks like a bug in there... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If you set the indexer to something like |
||
else: | ||
value = value[pi] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ´This case is for arrays? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes |
||
indexer = (newkey, icols) | ||
elif com.is_bool_indexer(indexer): | ||
indexer = indexer.nonzero()[0] | ||
|
||
return indexer, value | ||
|
||
@final | ||
def _tupleize_axis_indexer(self, key) -> tuple: | ||
""" | ||
|
@@ -727,6 +778,10 @@ def __setitem__(self, key, value): | |
indexer = self._get_setitem_indexer(key) | ||
self._has_valid_setitem_indexer(key) | ||
|
||
if self.name == "loc": | ||
# TODO: should we do this for iloc too? | ||
indexer, value = self._maybe_mask_setitem_value(indexer, value) | ||
|
||
iloc = self if self.name == "iloc" else self.obj.iloc | ||
iloc._setitem_with_indexer(indexer, value, self.name) | ||
|
||
|
@@ -1299,8 +1354,7 @@ def _convert_to_indexer(self, key, axis: int): | |
|
||
if com.is_bool_indexer(key): | ||
key = check_bool_indexer(labels, key) | ||
(inds,) = key.nonzero() | ||
return inds | ||
return key | ||
else: | ||
return self._get_listlike_indexer(key, axis)[1] | ||
else: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think you mean loc here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think arraylike is not correct, we do this only when the rhs has an index. We align not apply the mask too
Raises, which is what I would have expected
I think this should continue to raise