From e2b809a5ea4362faa0f2ab2d5f1a33d2f9ee68f6 Mon Sep 17 00:00:00 2001 From: Matias Lindgren Date: Sat, 27 Jul 2024 15:05:04 +0200 Subject: [PATCH 1/7] Revert "CLN: Remove special cases in indexing ops (#52063)" This reverts commit 8e456d3599541dc1a7fe7ec742274774f768f97d. --- pandas/core/indexing.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 455e61b8bc254..2b41768529251 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -810,8 +810,14 @@ def _maybe_mask_setitem_value(self, indexer, value): if is_scalar_indexer(icols, self.ndim - 1) and ndim == 1: # e.g. test_loc_setitem_boolean_mask_allfalse - # test_loc_setitem_ndframe_values_alignment - value = self.obj.iloc._align_series(indexer, value) + if len(newkey) == 0: + # FIXME: kludge for test_loc_setitem_boolean_mask_allfalse + # TODO(GH#45333): may be fixed when deprecation is enforced + + value = value.iloc[:0] + else: + # test_loc_setitem_ndframe_values_alignment + value = self.obj.iloc._align_series(indexer, value) indexer = (newkey, icols) elif ( @@ -827,8 +833,14 @@ def _maybe_mask_setitem_value(self, indexer, value): indexer = (newkey, icols) elif ndim == 2 and value.shape[1] == 1: - # test_loc_setitem_ndframe_values_alignment - value = self.obj.iloc._align_frame(indexer, value) + if len(newkey) == 0: + # FIXME: kludge for + # test_loc_setitem_all_false_boolean_two_blocks + # TODO(GH#45333): may be fixed when deprecation is enforced + value = value.iloc[:0] + else: + # test_loc_setitem_ndframe_values_alignment + value = self.obj.iloc._align_frame(indexer, value) indexer = (newkey, icols) elif com.is_bool_indexer(indexer): indexer = indexer.nonzero()[0] @@ -2389,7 +2401,7 @@ def ravel(i): new_ix = Index([new_ix]) else: new_ix = Index(new_ix) - if ser.index.equals(new_ix): + if ser.index.equals(new_ix) or not len(new_ix): if using_cow: return ser return ser._values.copy() From cc06fb0e9c0eb461a7f59ff04b2db047a6ea2bea Mon Sep 17 00:00:00 2001 From: Matias Lindgren Date: Sat, 27 Jul 2024 15:16:16 +0200 Subject: [PATCH 2/7] remove old comments, add test --- pandas/core/indexing.py | 6 ------ pandas/tests/indexing/test_loc.py | 9 +++++++-- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 2b41768529251..9bd5f388fb11c 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -811,9 +811,6 @@ def _maybe_mask_setitem_value(self, indexer, value): if is_scalar_indexer(icols, self.ndim - 1) and ndim == 1: # e.g. test_loc_setitem_boolean_mask_allfalse if len(newkey) == 0: - # FIXME: kludge for test_loc_setitem_boolean_mask_allfalse - # TODO(GH#45333): may be fixed when deprecation is enforced - value = value.iloc[:0] else: # test_loc_setitem_ndframe_values_alignment @@ -834,9 +831,6 @@ def _maybe_mask_setitem_value(self, indexer, value): elif ndim == 2 and value.shape[1] == 1: if len(newkey) == 0: - # FIXME: kludge for - # test_loc_setitem_all_false_boolean_two_blocks - # TODO(GH#45333): may be fixed when deprecation is enforced value = value.iloc[:0] else: # test_loc_setitem_ndframe_values_alignment diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 72cda194bec53..7ee9a9e989172 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1747,7 +1747,7 @@ def test_loc_setitem_multiindex_slice(self): ) result = Series([1, 1, 1, 1, 1, 1, 1, 1], index=index) - result.loc[("baz", "one") : ("foo", "two")] = 100 + result.loc[("baz", "one"):("foo", "two")] = 100 expected = Series([1, 1, 100, 100, 100, 100, 1, 1], index=index) @@ -2764,7 +2764,7 @@ def test_loc_axis_1_slice(): index=tuple("ABCDEFGHIJ"), columns=MultiIndex.from_tuples(cols), ) - result = df.loc(axis=1)[(2014, 9) : (2015, 8)] + result = df.loc(axis=1)[(2014, 9):(2015, 8)] expected = DataFrame( np.ones((10, 4)), index=tuple("ABCDEFGHIJ"), @@ -3272,3 +3272,8 @@ def test_loc_index_alignment_for_series(self): df.loc[:, "a"] = other expected = DataFrame({"a": [999, 200], "b": [3, 4]}) tm.assert_frame_equal(expected, df) + + def test_loc_assign_to_should_not_raise(self): + # GH 57735 + df = DataFrame(index=[1, 1, 2, 2], data=["1", "1", "2", "2"]) + df.loc[df[0].str.len() > 1, 0] = df[0] From efba5a55b6cd9276166bbaf8851b0596e506e4cc Mon Sep 17 00:00:00 2001 From: Matias Lindgren Date: Sat, 27 Jul 2024 19:25:47 +0200 Subject: [PATCH 3/7] use better test name --- pandas/tests/indexing/test_loc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 7ee9a9e989172..12bf012c06935 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -3273,7 +3273,7 @@ def test_loc_index_alignment_for_series(self): expected = DataFrame({"a": [999, 200], "b": [3, 4]}) tm.assert_frame_equal(expected, df) - def test_loc_assign_to_should_not_raise(self): + def test_loc_reindexing_of_empty_index(self): # GH 57735 df = DataFrame(index=[1, 1, 2, 2], data=["1", "1", "2", "2"]) df.loc[df[0].str.len() > 1, 0] = df[0] From 7f937d802171265c132433d93d288ab7b5c89944 Mon Sep 17 00:00:00 2001 From: matiaslindgren Date: Mon, 29 Jul 2024 20:42:48 +0200 Subject: [PATCH 4/7] Update pandas/tests/indexing/test_loc.py Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- pandas/tests/indexing/test_loc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 12bf012c06935..67c937e371a4e 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -3276,4 +3276,4 @@ def test_loc_index_alignment_for_series(self): def test_loc_reindexing_of_empty_index(self): # GH 57735 df = DataFrame(index=[1, 1, 2, 2], data=["1", "1", "2", "2"]) - df.loc[df[0].str.len() > 1, 0] = df[0] + df.loc[Series([False] * 4], index=df.index, name=0), 0] = df[0] From 20e21f0b6db183c1ca4dc4a882b86885368f2260 Mon Sep 17 00:00:00 2001 From: Matias Lindgren Date: Mon, 29 Jul 2024 20:42:39 +0200 Subject: [PATCH 5/7] check for empty index first --- pandas/core/indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 9bd5f388fb11c..debb5bdd4fc4b 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2395,7 +2395,7 @@ def ravel(i): new_ix = Index([new_ix]) else: new_ix = Index(new_ix) - if ser.index.equals(new_ix) or not len(new_ix): + if not len(new_ix) or ser.index.equals(new_ix): if using_cow: return ser return ser._values.copy() From 78ccf15bb9e2d4bed7eccf06434442a306b21cdf Mon Sep 17 00:00:00 2001 From: Matias Lindgren Date: Mon, 29 Jul 2024 20:44:46 +0200 Subject: [PATCH 6/7] assert assign to empty does not change frame --- pandas/tests/indexing/test_loc.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 67c937e371a4e..f12b7fdf3560c 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -3276,4 +3276,6 @@ def test_loc_index_alignment_for_series(self): def test_loc_reindexing_of_empty_index(self): # GH 57735 df = DataFrame(index=[1, 1, 2, 2], data=["1", "1", "2", "2"]) - df.loc[Series([False] * 4], index=df.index, name=0), 0] = df[0] + df.loc[Series([False] * 4, index=df.index, name=0), 0] = df[0] + expected = DataFrame(index=[1, 1, 2, 2], data=["1", "1", "2", "2"]) + tm.assert_frame_equal(df, expected) From c9e78a75016473dbda0bd4781ce046550af7d2ef Mon Sep 17 00:00:00 2001 From: Matias Lindgren Date: Mon, 29 Jul 2024 20:50:21 +0200 Subject: [PATCH 7/7] format --- pandas/tests/indexing/test_loc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index f12b7fdf3560c..f90bd9e6802c8 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1747,7 +1747,7 @@ def test_loc_setitem_multiindex_slice(self): ) result = Series([1, 1, 1, 1, 1, 1, 1, 1], index=index) - result.loc[("baz", "one"):("foo", "two")] = 100 + result.loc[("baz", "one") : ("foo", "two")] = 100 expected = Series([1, 1, 100, 100, 100, 100, 1, 1], index=index) @@ -2764,7 +2764,7 @@ def test_loc_axis_1_slice(): index=tuple("ABCDEFGHIJ"), columns=MultiIndex.from_tuples(cols), ) - result = df.loc(axis=1)[(2014, 9):(2015, 8)] + result = df.loc(axis=1)[(2014, 9) : (2015, 8)] expected = DataFrame( np.ones((10, 4)), index=tuple("ABCDEFGHIJ"),