From f3762d786fa22cd8401408c08e870c36bc93ee89 Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 3 Apr 2021 00:41:49 +0200 Subject: [PATCH 1/3] Bug in loc not ordering rhs correctly for mixed indexer --- pandas/core/indexing.py | 6 +++++- pandas/tests/indexing/test_loc.py | 14 +++++++++----- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 4c8a6a200b196..7d2be2e3c1d59 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1869,7 +1869,11 @@ def _setitem_single_column(self, loc: int, value, plane_indexer): if com.is_null_slice(pi) or com.is_full_slice(pi, len(self.obj)): ser = value elif is_array_like(value) and is_exact_shape_match(ser, value): - ser = value + if is_list_like(pi): + ser = value[np.argsort(pi)] + else: + # in case of slice + ser = value[pi] else: # set the item, possibly having a dtype change ser = ser.copy() diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 734cf13289c1f..f851e05b573ca 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -602,12 +602,8 @@ def test_loc_setitem_frame_with_reindex(self, using_array_manager): expected = DataFrame({"A": ser}) tm.assert_frame_equal(df, expected) - @pytest.mark.xfail(reason="split path wrong update - GH40480") def test_loc_setitem_frame_with_reindex_mixed(self): - # same test as above, but with mixed dataframe - # TODO with "split" path we still actually overwrite the column - # and therefore don't take the order of the indexer into account - # -> this is a bug: https://github.com/pandas-dev/pandas/issues/40480 + # GH#40480 df = DataFrame(index=[3, 5, 4], columns=["A", "B"], dtype=float) df["B"] = "string" df.loc[[4, 3, 5], "A"] = np.array([1, 2, 3], dtype="int64") @@ -616,6 +612,14 @@ def test_loc_setitem_frame_with_reindex_mixed(self): expected["B"] = "string" tm.assert_frame_equal(df, expected) + def test_loc_setitem_frame_with_inverted_slice(self): + # GH#40480 + df = DataFrame(index=[1, 2, 3], columns=["A", "B"], dtype=float) + df["B"] = "string" + df.loc[slice(3, 0, -1), "A"] = np.array([1, 2, 3], dtype="int64") + expected = DataFrame({"A": [3, 2, 1], "B": "string"}, index=[1, 2, 3]) + tm.assert_frame_equal(df, expected) + # TODO(ArrayManager) "split" path overwrites column and therefore don't take # the order of the indexer into account @td.skip_array_manager_not_yet_implemented From 9912207a4358ceb41914fe6daee8e696d3f78120 Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 10 Apr 2021 00:12:12 +0200 Subject: [PATCH 2/3] Fix failing test --- pandas/tests/indexing/test_loc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index c132cdefc7286..5a619d8ff8e5c 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -598,7 +598,7 @@ def test_loc_setitem_frame_with_reindex(self, using_array_manager): if using_array_manager: # TODO(ArrayManager) with "split" path, we still overwrite the column # and therefore don't take the order of the indexer into account - ser = Series([1, 2, 3], index=[3, 5, 4], dtype="int64") + ser = Series([2, 3, 1], index=[3, 5, 4], dtype="int64") expected = DataFrame({"A": ser}) tm.assert_frame_equal(df, expected) @@ -621,7 +621,7 @@ def test_loc_setitem_frame_with_inverted_slice(self): tm.assert_frame_equal(df, expected) # TODO(ArrayManager) "split" path overwrites column and therefore don't take - # the order of the indexer into account + # the dtype of the underlying object into account @td.skip_array_manager_not_yet_implemented def test_loc_setitem_empty_frame(self): # GH#6252 setting with an empty frame From 75743126bada885bff37321158a8b0ba4ce18913 Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 10 Apr 2021 00:14:12 +0200 Subject: [PATCH 3/3] Adjust comment --- pandas/tests/indexing/test_loc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 5a619d8ff8e5c..97b3412ce626e 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -597,7 +597,7 @@ def test_loc_setitem_frame_with_reindex(self, using_array_manager): ser = Series([2, 3, 1], index=[3, 5, 4], dtype=float) if using_array_manager: # TODO(ArrayManager) with "split" path, we still overwrite the column - # and therefore don't take the order of the indexer into account + # and therefore don't take the dtype of the underlying object into account ser = Series([2, 3, 1], index=[3, 5, 4], dtype="int64") expected = DataFrame({"A": ser}) tm.assert_frame_equal(df, expected)