From f57718ee3d777047b840c3618b6203d8c38d668f Mon Sep 17 00:00:00 2001 From: yopark Date: Sun, 18 Apr 2021 21:14:02 -0400 Subject: [PATCH 1/2] added test case for iloc function if it returns the same output for both numpy and pandas arrays to fix the issue #40933 --- pandas/tests/indexing/multiindex/test_iloc.py | 19 +++++++++++++++++++ pip | 1 + 2 files changed, 20 insertions(+) create mode 160000 pip diff --git a/pandas/tests/indexing/multiindex/test_iloc.py b/pandas/tests/indexing/multiindex/test_iloc.py index db91d5ad88252..9326cc3992328 100644 --- a/pandas/tests/indexing/multiindex/test_iloc.py +++ b/pandas/tests/indexing/multiindex/test_iloc.py @@ -1,6 +1,7 @@ import numpy as np import pytest +import pandas as pd from pandas import ( DataFrame, MultiIndex, @@ -73,6 +74,24 @@ def test_iloc_getitem_multiple_items(): tm.assert_frame_equal(result, expected) +def test_iloc_np_and_pd(): + # test if iloc returns the same output for numpy array input and integer array input + df = DataFrame( + data={ + "col1": [1, 2, 3, 4], + "col2": [3, 4, 5, 6], + "col3": [6, 7, 8, 9], + } + ) + df_np = df + df_pd = df + np_arr = np.array([1, 2, 3]) + pd_arr = pd.array([1, 2, 3]) + df_np.iloc[[1, 2, 3]] = np_arr + df_pd.iloc[[1, 2, 3]] = pd_arr + tm.assert_frame_equal(df_np, df_pd) + + def test_iloc_getitem_labels(): # this is basically regular indexing arr = np.random.randn(4, 3) diff --git a/pip b/pip new file mode 160000 index 0000000000000..e6a65fc5852b0 --- /dev/null +++ b/pip @@ -0,0 +1 @@ +Subproject commit e6a65fc5852b0237bb588b00e51ea9384b8f23e4 From 4b26e297c6b05ec5c16b463497eaa8645aec5428 Mon Sep 17 00:00:00 2001 From: yopark Date: Fri, 23 Apr 2021 12:49:00 -0400 Subject: [PATCH 2/2] added the value check when setting items with given values for when the value is pandas array. --- pandas/core/indexing.py | 8 ++-- pandas/tests/indexing/multiindex/test_iloc.py | 41 ++++++++++++++++--- 2 files changed, 40 insertions(+), 9 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 82971e460a8a2..de053b24de70f 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -44,6 +44,7 @@ isna, ) +from pandas.core.arrays.integer import IntegerArray import pandas.core.common as com from pandas.core.construction import array as pd_array from pandas.core.indexers import ( @@ -1588,10 +1589,11 @@ def _setitem_with_indexer(self, indexer, value, name="iloc"): BlockManager methods, see GH#12991, GH#22046, GH#15686. """ info_axis = self.obj._info_axis_number - # maybe partial set take_split_path = not self.obj._mgr.is_single_block + value = np.array(value) if isinstance(value, IntegerArray) else value + # if there is only one block/type, still have to take split path # unless the block is one-dimensional or it can hold the value if ( @@ -1705,7 +1707,6 @@ def _setitem_with_indexer_split_path(self, indexer, value, name: str): """ # Above we only set take_split_path to True for 2D cases assert self.ndim == 2 - if not isinstance(indexer, tuple): indexer = _tuplify(self.ndim, indexer) if len(indexer) > self.ndim: @@ -1716,8 +1717,8 @@ def _setitem_with_indexer_split_path(self, indexer, value, name: str): if (isinstance(value, ABCSeries) and name != "iloc") or isinstance(value, dict): from pandas import Series + print("value is ABCseries") value = self._align_series(indexer, Series(value)) - # Ensure we have something we can iterate over info_axis = indexer[1] ilocs = self._ensure_iterable_column_indexer(info_axis) @@ -1731,6 +1732,7 @@ def _setitem_with_indexer_split_path(self, indexer, value, name: str): if is_list_like_indexer(value) and getattr(value, "ndim", 1) > 0: if isinstance(value, ABCDataFrame): + print("value is ABCDataFrame") self._setitem_with_indexer_frame_value(indexer, value, name) elif np.ndim(value) == 2: diff --git a/pandas/tests/indexing/multiindex/test_iloc.py b/pandas/tests/indexing/multiindex/test_iloc.py index 9326cc3992328..1cfdfa7da9fb8 100644 --- a/pandas/tests/indexing/multiindex/test_iloc.py +++ b/pandas/tests/indexing/multiindex/test_iloc.py @@ -74,7 +74,7 @@ def test_iloc_getitem_multiple_items(): tm.assert_frame_equal(result, expected) -def test_iloc_np_and_pd(): +def test_iloc_pd_arr_value(): # test if iloc returns the same output for numpy array input and integer array input df = DataFrame( data={ @@ -83,13 +83,42 @@ def test_iloc_np_and_pd(): "col3": [6, 7, 8, 9], } ) - df_np = df - df_pd = df - np_arr = np.array([1, 2, 3]) + df_np = df.copy() + df_pd = df.copy() + df_int = df.copy() + pd_arr = pd.array([1, 2, 3]) - df_np.iloc[[1, 2, 3]] = np_arr + np_arr = np.array([1, 2, 3]) + int_arr = [1, 2, 3] df_pd.iloc[[1, 2, 3]] = pd_arr - tm.assert_frame_equal(df_np, df_pd) + df_np.iloc[[1, 2, 3]] = np_arr + df_int.iloc[[1, 2, 3]] = int_arr + + tm.assert_frame_equal(df_pd, df_np) and tm.assert_frame_equal(df_int, df_np) + + +def test_iloc_pd_arr_value2(): + # test if iloc returns the same output for numpy array input and integer array input + df = DataFrame( + data={ + "col1": [1, 2, 3, 4], + "col2": [3, 4, 5, 6], + "col3": [6, 7, 8, 9], + } + ) + df_np = df.copy() + df_pd = df.copy() + df_int = df.copy() + + pd_arr = pd.array([1, 2, 3]) + np_arr = np.array([1, 2, 3]) + int_arr = [1, 2, 3] + + df_pd.iloc[[1, 2, 3], :] = pd_arr + df_np.iloc[[1, 2, 3], :] = np_arr + df_int.iloc[[1, 2, 3], :] = int_arr + + tm.assert_frame_equal(df_pd, df_np) and tm.assert_frame_equal(df_int, df_np) def test_iloc_getitem_labels():