diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 5f9bc142c5836..4a251ae0d93da 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -53,6 +53,7 @@ is_empty_indexer, is_exact_shape_match, is_list_like_indexer, + is_scalar_indexer, length_of_indexer, ) from pandas.core.indexes.api import ( @@ -669,6 +670,71 @@ def _get_setitem_indexer(self, key): return self._convert_to_indexer(key, axis=0) + @final + def _maybe_mask_setitem_value(self, indexer, value): + """ + If we have obj.iloc[mask] = series_or_frame and series_or_frame has the + same length as obj, we treat this as obj.iloc[mask] = series_or_frame[mask], + similar to Series.__setitem__. + + Note this is only for loc, not iloc. + """ + + if ( + isinstance(indexer, tuple) + and len(indexer) == 2 + and isinstance(value, (ABCSeries, ABCDataFrame)) + ): + pi, icols = indexer + ndim = value.ndim + if com.is_bool_indexer(pi) and len(value) == len(pi): + newkey = pi.nonzero()[0] + + if is_scalar_indexer(icols, self.ndim - 1) and ndim == 1: + # e.g. test_loc_setitem_boolean_mask_allfalse + if len(newkey) == 0: + # FIXME: kludge for test_loc_setitem_boolean_mask_allfalse + # TODO(GH#45333): may be fixed when deprecation is enforced + + value = value.iloc[:0] + else: + # test_loc_setitem_ndframe_values_alignment + value = self.obj.iloc._align_series(indexer, value) + indexer = (newkey, icols) + + elif ( + isinstance(icols, np.ndarray) + and icols.dtype.kind == "i" + and len(icols) == 1 + ): + if ndim == 1: + # We implicitly broadcast, though numpy does not, see + # github.com/pandas-dev/pandas/pull/45501#discussion_r789071825 + if len(newkey) == 0: + # FIXME: kludge for + # test_setitem_loc_only_false_indexer_dtype_changed + # TODO(GH#45333): may be fixed when deprecation is enforced + value = value.iloc[:0] + else: + # test_loc_setitem_ndframe_values_alignment + value = self.obj.iloc._align_series(indexer, value) + indexer = (newkey, icols) + + elif ndim == 2 and value.shape[1] == 1: + if len(newkey) == 0: + # FIXME: kludge for + # test_loc_setitem_all_false_boolean_two_blocks + # TODO(GH#45333): may be fixed when deprecation is enforced + value = value.iloc[:0] + else: + # test_loc_setitem_ndframe_values_alignment + value = self.obj.iloc._align_frame(indexer, value) + indexer = (newkey, icols) + elif com.is_bool_indexer(indexer): + indexer = indexer.nonzero()[0] + + return indexer, value + @final def _tupleize_axis_indexer(self, key) -> tuple: """ @@ -1299,8 +1365,7 @@ def _convert_to_indexer(self, key, axis: int): if com.is_bool_indexer(key): key = check_bool_indexer(labels, key) - (inds,) = key.nonzero() - return inds + return key else: return self._get_listlike_indexer(key, axis)[1] else: @@ -1696,6 +1761,10 @@ def _setitem_with_indexer(self, indexer, value, name="iloc"): self._setitem_with_indexer_missing(indexer, value) return + if name == "loc": + # must come after setting of missing + indexer, value = self._maybe_mask_setitem_value(indexer, value) + # align and set the values if take_split_path: # We have to operate column-wise diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 3d4f53530b89c..0cdd6425c2657 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -932,7 +932,7 @@ def setitem(self, indexer, value): if is_empty_indexer(indexer): # GH#8669 empty indexers, test_loc_setitem_boolean_mask_allfalse - pass + values[indexer] = value elif is_scalar_indexer(indexer, self.ndim): # setting a single element for each dim and with a rhs that could diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 4b751fa7d5e3e..73beb04fca81f 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -2446,6 +2446,31 @@ def test_loc_setitem_boolean_and_column(self, float_frame): tm.assert_frame_equal(float_frame, expected) + def test_loc_setitem_ndframe_values_alignment(self): + # GH#45501 + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + df.loc[[False, False, True], ["a"]] = DataFrame( + {"a": [10, 20, 30]}, index=[2, 1, 0] + ) + + expected = DataFrame({"a": [1, 2, 10], "b": [4, 5, 6]}) + tm.assert_frame_equal(df, expected) + + # same thing with Series RHS + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + df.loc[[False, False, True], ["a"]] = Series([10, 11, 12], index=[2, 1, 0]) + tm.assert_frame_equal(df, expected) + + # same thing but setting "a" instead of ["a"] + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + df.loc[[False, False, True], "a"] = Series([10, 11, 12], index=[2, 1, 0]) + tm.assert_frame_equal(df, expected) + + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + ser = df["a"] + ser.loc[[False, False, True]] = Series([10, 11, 12], index=[2, 1, 0]) + tm.assert_frame_equal(df, expected) + class TestLocListlike: @pytest.mark.parametrize("box", [lambda x: x, np.asarray, list])