From 4b6a11d63cadab528e2812311068400cbf0ef097 Mon Sep 17 00:00:00 2001 From: phofl Date: Mon, 3 May 2021 21:06:55 +0200 Subject: [PATCH 1/2] Bug in iloc.setitem orienting IntegerArray into the wrong direction --- pandas/core/indexers.py | 2 +- pandas/core/internals/blocks.py | 7 +------ pandas/tests/frame/indexing/test_setitem.py | 20 ++++++++++++++------ pandas/tests/indexing/test_iloc.py | 6 +++++- 4 files changed, 21 insertions(+), 14 deletions(-) diff --git a/pandas/core/indexers.py b/pandas/core/indexers.py index d3756d6252c0a..4f3f536cd3290 100644 --- a/pandas/core/indexers.py +++ b/pandas/core/indexers.py @@ -164,7 +164,7 @@ def check_setitem_lengths(indexer, value, values) -> bool: # a) not necessarily 1-D indexers, e.g. tuple # b) boolean indexers e.g. BoolArray if is_list_like(value): - if len(indexer) != len(value): + if len(indexer) != len(value) and values.ndim == 1: # boolean with truth values == len of the value is ok too if not ( isinstance(indexer, np.ndarray) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 61396fdf372d5..d87e77043a713 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -970,12 +970,7 @@ def setitem(self, indexer, value): values[indexer] = value elif is_ea_value: - # GH#38952 - if values.ndim == 1: - values[indexer] = value - else: - # TODO(EA2D): special case not needed with 2D EA - values[indexer] = value.to_numpy(values.dtype).reshape(-1, 1) + values[indexer] = value else: # error: Argument 1 to "setitem_datetimelike_compat" has incompatible type diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 4004e595c832f..6c8f56f24356e 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -31,6 +31,7 @@ PeriodIndex, Series, Timestamp, + array, cut, date_range, notna, @@ -792,22 +793,29 @@ def test_setitem_slice_position(self): tm.assert_frame_equal(df, expected) @pytest.mark.parametrize("indexer", [tm.setitem, tm.iloc]) - @pytest.mark.parametrize("box", [Series, np.array, list]) + @pytest.mark.parametrize("box", [Series, np.array, list, array]) @pytest.mark.parametrize("n", [1, 2, 3]) - def test_setitem_broadcasting_rhs(self, n, box, indexer): + def test_setitem_slice_indexer_broadcasting_rhs(self, n, box, indexer): # GH#40440 - # TODO: Add pandas array as box after GH#40933 is fixed df = DataFrame([[1, 3, 5]] + [[2, 4, 6]] * n, columns=["a", "b", "c"]) indexer(df)[1:] = box([10, 11, 12]) expected = DataFrame([[1, 3, 5]] + [[10, 11, 12]] * n, columns=["a", "b", "c"]) tm.assert_frame_equal(df, expected) + @pytest.mark.parametrize("box", [Series, np.array, list, array]) + @pytest.mark.parametrize("n", [1, 2, 3]) + def test_setitem_list_indexer_broadcasting_rhs(self, n, box): + # GH#40440 + df = DataFrame([[1, 3, 5]] + [[2, 4, 6]] * n, columns=["a", "b", "c"]) + df.iloc[list(range(1, n + 1))] = box([10, 11, 12]) + expected = DataFrame([[1, 3, 5]] + [[10, 11, 12]] * n, columns=["a", "b", "c"]) + tm.assert_frame_equal(df, expected) + @pytest.mark.parametrize("indexer", [tm.setitem, tm.iloc]) - @pytest.mark.parametrize("box", [Series, np.array, list]) + @pytest.mark.parametrize("box", [Series, np.array, list, array]) @pytest.mark.parametrize("n", [1, 2, 3]) - def test_setitem_broadcasting_rhs_mixed_dtypes(self, n, box, indexer): + def test_setitem_slice_broadcasting_rhs_mixed_dtypes(self, n, box, indexer): # GH#40440 - # TODO: Add pandas array as box after GH#40933 is fixed df = DataFrame( [[1, 3, 5], ["x", "y", "z"]] + [[2, 4, 6]] * n, columns=["a", "b", "c"] ) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index ad0d4245d58c3..446b616111e9e 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -122,7 +122,11 @@ def test_iloc_setitem_ea_inplace(self, frame_or_series, box, using_array_manager else: values = obj[0].values - obj.iloc[:2] = box(arr[2:]) + if frame_or_series is Series: + obj.iloc[:2] = box(arr[2:]) + else: + obj.iloc[:2, 0] = box(arr[2:]) + expected = frame_or_series(np.array([3, 4, 3, 4], dtype="i8")) tm.assert_equal(obj, expected) From 940fa5e351e81befbc4965d32ee36e234cb47f2a Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 4 May 2021 22:44:46 +0200 Subject: [PATCH 2/2] Use pd.array --- pandas/tests/frame/indexing/test_setitem.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 6c8f56f24356e..f46ecf61138b1 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -18,6 +18,7 @@ PeriodDtype, ) +import pandas as pd from pandas import ( Categorical, DataFrame, @@ -31,7 +32,6 @@ PeriodIndex, Series, Timestamp, - array, cut, date_range, notna, @@ -793,7 +793,7 @@ def test_setitem_slice_position(self): tm.assert_frame_equal(df, expected) @pytest.mark.parametrize("indexer", [tm.setitem, tm.iloc]) - @pytest.mark.parametrize("box", [Series, np.array, list, array]) + @pytest.mark.parametrize("box", [Series, np.array, list, pd.array]) @pytest.mark.parametrize("n", [1, 2, 3]) def test_setitem_slice_indexer_broadcasting_rhs(self, n, box, indexer): # GH#40440 @@ -802,7 +802,7 @@ def test_setitem_slice_indexer_broadcasting_rhs(self, n, box, indexer): expected = DataFrame([[1, 3, 5]] + [[10, 11, 12]] * n, columns=["a", "b", "c"]) tm.assert_frame_equal(df, expected) - @pytest.mark.parametrize("box", [Series, np.array, list, array]) + @pytest.mark.parametrize("box", [Series, np.array, list, pd.array]) @pytest.mark.parametrize("n", [1, 2, 3]) def test_setitem_list_indexer_broadcasting_rhs(self, n, box): # GH#40440 @@ -812,7 +812,7 @@ def test_setitem_list_indexer_broadcasting_rhs(self, n, box): tm.assert_frame_equal(df, expected) @pytest.mark.parametrize("indexer", [tm.setitem, tm.iloc]) - @pytest.mark.parametrize("box", [Series, np.array, list, array]) + @pytest.mark.parametrize("box", [Series, np.array, list, pd.array]) @pytest.mark.parametrize("n", [1, 2, 3]) def test_setitem_slice_broadcasting_rhs_mixed_dtypes(self, n, box, indexer): # GH#40440