Skip to content

Commit c64fbce

Browse files
authored
REF: mask values in loc.__setitem__ with bool indexer (#45501)
1 parent 275b187 commit c64fbce

File tree

3 files changed

+97
-3
lines changed

3 files changed

+97
-3
lines changed

pandas/core/indexing.py

+71-2
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
is_empty_indexer,
5555
is_exact_shape_match,
5656
is_list_like_indexer,
57+
is_scalar_indexer,
5758
length_of_indexer,
5859
)
5960
from pandas.core.indexes.api import (
@@ -671,6 +672,71 @@ def _get_setitem_indexer(self, key):
671672

672673
return self._convert_to_indexer(key, axis=0)
673674

675+
@final
676+
def _maybe_mask_setitem_value(self, indexer, value):
677+
"""
678+
If we have obj.iloc[mask] = series_or_frame and series_or_frame has the
679+
same length as obj, we treat this as obj.iloc[mask] = series_or_frame[mask],
680+
similar to Series.__setitem__.
681+
682+
Note this is only for loc, not iloc.
683+
"""
684+
685+
if (
686+
isinstance(indexer, tuple)
687+
and len(indexer) == 2
688+
and isinstance(value, (ABCSeries, ABCDataFrame))
689+
):
690+
pi, icols = indexer
691+
ndim = value.ndim
692+
if com.is_bool_indexer(pi) and len(value) == len(pi):
693+
newkey = pi.nonzero()[0]
694+
695+
if is_scalar_indexer(icols, self.ndim - 1) and ndim == 1:
696+
# e.g. test_loc_setitem_boolean_mask_allfalse
697+
if len(newkey) == 0:
698+
# FIXME: kludge for test_loc_setitem_boolean_mask_allfalse
699+
# TODO(GH#45333): may be fixed when deprecation is enforced
700+
701+
value = value.iloc[:0]
702+
else:
703+
# test_loc_setitem_ndframe_values_alignment
704+
value = self.obj.iloc._align_series(indexer, value)
705+
indexer = (newkey, icols)
706+
707+
elif (
708+
isinstance(icols, np.ndarray)
709+
and icols.dtype.kind == "i"
710+
and len(icols) == 1
711+
):
712+
if ndim == 1:
713+
# We implicitly broadcast, though numpy does not, see
714+
# github.com/pandas-dev/pandas/pull/45501#discussion_r789071825
715+
if len(newkey) == 0:
716+
# FIXME: kludge for
717+
# test_setitem_loc_only_false_indexer_dtype_changed
718+
# TODO(GH#45333): may be fixed when deprecation is enforced
719+
value = value.iloc[:0]
720+
else:
721+
# test_loc_setitem_ndframe_values_alignment
722+
value = self.obj.iloc._align_series(indexer, value)
723+
indexer = (newkey, icols)
724+
725+
elif ndim == 2 and value.shape[1] == 1:
726+
if len(newkey) == 0:
727+
# FIXME: kludge for
728+
# test_loc_setitem_all_false_boolean_two_blocks
729+
# TODO(GH#45333): may be fixed when deprecation is enforced
730+
value = value.iloc[:0]
731+
else:
732+
# test_loc_setitem_ndframe_values_alignment
733+
value = self.obj.iloc._align_frame(indexer, value)
734+
indexer = (newkey, icols)
735+
elif com.is_bool_indexer(indexer):
736+
indexer = indexer.nonzero()[0]
737+
738+
return indexer, value
739+
674740
@final
675741
def _tupleize_axis_indexer(self, key) -> tuple:
676742
"""
@@ -1309,8 +1375,7 @@ def _convert_to_indexer(self, key, axis: int):
13091375

13101376
if com.is_bool_indexer(key):
13111377
key = check_bool_indexer(labels, key)
1312-
(inds,) = key.nonzero()
1313-
return inds
1378+
return key
13141379
else:
13151380
return self._get_listlike_indexer(key, axis)[1]
13161381
else:
@@ -1704,6 +1769,10 @@ def _setitem_with_indexer(self, indexer, value, name="iloc"):
17041769
self._setitem_with_indexer_missing(indexer, value)
17051770
return
17061771

1772+
if name == "loc":
1773+
# must come after setting of missing
1774+
indexer, value = self._maybe_mask_setitem_value(indexer, value)
1775+
17071776
# align and set the values
17081777
if take_split_path:
17091778
# We have to operate column-wise

pandas/core/internals/blocks.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -928,7 +928,7 @@ def setitem(self, indexer, value):
928928

929929
if is_empty_indexer(indexer):
930930
# GH#8669 empty indexers, test_loc_setitem_boolean_mask_allfalse
931-
pass
931+
values[indexer] = value
932932

933933
elif is_scalar_indexer(indexer, self.ndim):
934934
# setting a single element for each dim and with a rhs that could

pandas/tests/indexing/test_loc.py

+25
Original file line numberDiff line numberDiff line change
@@ -2463,6 +2463,31 @@ def test_loc_setitem_boolean_and_column(self, float_frame):
24632463

24642464
tm.assert_frame_equal(float_frame, expected)
24652465

2466+
def test_loc_setitem_ndframe_values_alignment(self):
2467+
# GH#45501
2468+
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
2469+
df.loc[[False, False, True], ["a"]] = DataFrame(
2470+
{"a": [10, 20, 30]}, index=[2, 1, 0]
2471+
)
2472+
2473+
expected = DataFrame({"a": [1, 2, 10], "b": [4, 5, 6]})
2474+
tm.assert_frame_equal(df, expected)
2475+
2476+
# same thing with Series RHS
2477+
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
2478+
df.loc[[False, False, True], ["a"]] = Series([10, 11, 12], index=[2, 1, 0])
2479+
tm.assert_frame_equal(df, expected)
2480+
2481+
# same thing but setting "a" instead of ["a"]
2482+
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
2483+
df.loc[[False, False, True], "a"] = Series([10, 11, 12], index=[2, 1, 0])
2484+
tm.assert_frame_equal(df, expected)
2485+
2486+
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
2487+
ser = df["a"]
2488+
ser.loc[[False, False, True]] = Series([10, 11, 12], index=[2, 1, 0])
2489+
tm.assert_frame_equal(df, expected)
2490+
24662491

24672492
class TestLocListlike:
24682493
@pytest.mark.parametrize("box", [lambda x: x, np.asarray, list])

0 commit comments

Comments
 (0)