From 80ef85f602d7848ec913b6b21211e0731080323f Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sat, 10 Dec 2022 21:54:43 +0100 Subject: [PATCH 1/3] BUG: iloc raising for ea series --- doc/source/whatsnew/v2.0.0.rst | 1 + pandas/core/indexing.py | 5 ++++- pandas/tests/series/indexing/test_indexing.py | 8 ++++++++ 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index d6e0bb2ae0830..b41768a215b68 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -753,6 +753,7 @@ Indexing - Bug in :meth:`DataFrame.loc` raising ``ValueError`` with ``bool`` indexer and :class:`MultiIndex` (:issue:`47687`) - Bug in :meth:`DataFrame.__setitem__` raising ``ValueError`` when right hand side is :class:`DataFrame` with :class:`MultiIndex` columns (:issue:`49121`) - Bug in :meth:`DataFrame.reindex` casting dtype to ``object`` when :class:`DataFrame` has single extension array column when re-indexing ``columns`` and ``index`` (:issue:`48190`) +- Bug in :meth:`DataFrame.iloc` raising ``IndexError`` when indexer is a :class:`Series` with numeric extension array dtype (:issue:`49521`) - Bug in :func:`~DataFrame.describe` when formatting percentiles in the resulting index showed more decimals than needed (:issue:`46362`) - Bug in :meth:`DataFrame.compare` does not recognize differences when comparing ``NA`` with value in nullable dtypes (:issue:`48939`) - diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 070ec7c7a2e4a..586458f77757a 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1481,7 +1481,10 @@ def _validate_key(self, key, axis: AxisInt): # so don't treat a tuple as a valid indexer raise IndexingError("Too many indexers") elif is_list_like_indexer(key): - arr = np.array(key) + if isinstance(key, ABCSeries): + arr = key._values + else: + arr = np.array(key) len_axis = len(self.obj._get_axis(axis)) # check that the key has a numeric dtype diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index 4efc14be3eb7e..b4457611c83e8 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -373,6 +373,14 @@ def test_getitem_bool_int_key(): ser.loc[0] +def test_iloc_ea_series(): + # GH#49521 + df = DataFrame([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]) + result = df.iloc[Series([1], dtype="Int64"), Series([0, 1], dtype="Int64")] + expected = DataFrame([[5, 6]], index=[1]) + tm.assert_frame_equal(result, expected) + + class TestDeprecatedIndexers: @pytest.mark.parametrize("key", [{1}, {1: 1}]) def test_getitem_dict_and_set_deprecated(self, key): From d53f3b4517cc20cc2656e9ebdb5ccbced6ed991c Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sat, 10 Dec 2022 22:09:19 +0100 Subject: [PATCH 2/3] Move test --- pandas/tests/frame/indexing/test_indexing.py | 7 +++++++ pandas/tests/series/indexing/test_indexing.py | 8 -------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 81a5e3d9947be..b881988317a9d 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1437,6 +1437,13 @@ def test_loc_rhs_empty_warning(self): df.loc[:, "a"] = rhs tm.assert_frame_equal(df, expected) + def test_iloc_ea_series_indexer(self): + # GH#49521 + df = DataFrame([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]) + result = df.iloc[Series([1], dtype="Int64"), Series([0, 1], dtype="Int64")] + expected = DataFrame([[5, 6]], index=[1]) + tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize("indexer", [True, (True,)]) @pytest.mark.parametrize("dtype", [bool, "boolean"]) def test_loc_bool_multiindex(self, dtype, indexer): diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index b4457611c83e8..4efc14be3eb7e 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -373,14 +373,6 @@ def test_getitem_bool_int_key(): ser.loc[0] -def test_iloc_ea_series(): - # GH#49521 - df = DataFrame([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]) - result = df.iloc[Series([1], dtype="Int64"), Series([0, 1], dtype="Int64")] - expected = DataFrame([[5, 6]], index=[1]) - tm.assert_frame_equal(result, expected) - - class TestDeprecatedIndexers: @pytest.mark.parametrize("key", [{1}, {1: 1}]) def test_getitem_dict_and_set_deprecated(self, key): From ca68a199d14c889e620145943d12d9b84112c63d Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sun, 11 Dec 2022 14:57:37 +0100 Subject: [PATCH 3/3] Add test --- pandas/core/indexing.py | 2 ++ pandas/tests/frame/indexing/test_indexing.py | 17 ++++++++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 586458f77757a..9bcbedccba1e6 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1483,6 +1483,8 @@ def _validate_key(self, key, axis: AxisInt): elif is_list_like_indexer(key): if isinstance(key, ABCSeries): arr = key._values + elif is_array_like(key): + arr = key else: arr = np.array(key) len_axis = len(self.obj._get_axis(axis)) diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index b881988317a9d..5ce8800de1f67 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1440,10 +1440,25 @@ def test_loc_rhs_empty_warning(self): def test_iloc_ea_series_indexer(self): # GH#49521 df = DataFrame([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]) - result = df.iloc[Series([1], dtype="Int64"), Series([0, 1], dtype="Int64")] + indexer = Series([0, 1], dtype="Int64") + row_indexer = Series([1], dtype="Int64") + result = df.iloc[row_indexer, indexer] expected = DataFrame([[5, 6]], index=[1]) tm.assert_frame_equal(result, expected) + result = df.iloc[row_indexer.values, indexer.values] + tm.assert_frame_equal(result, expected) + + def test_iloc_ea_series_indexer_with_na(self): + # GH#49521 + df = DataFrame([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]) + indexer = Series([0, pd.NA], dtype="Int64") + msg = "cannot convert" + with pytest.raises(ValueError, match=msg): + df.iloc[:, indexer] + with pytest.raises(ValueError, match=msg): + df.iloc[:, indexer.values] + @pytest.mark.parametrize("indexer", [True, (True,)]) @pytest.mark.parametrize("dtype", [bool, "boolean"]) def test_loc_bool_multiindex(self, dtype, indexer):