From 88d3c7bf559ba99628e55e3d43a35ae9eac8c166 Mon Sep 17 00:00:00 2001 From: phofl Date: Mon, 14 Feb 2022 13:16:44 +0100 Subject: [PATCH 1/4] REGR: drop raising with ea index and duplicates --- doc/source/whatsnew/v1.4.2.rst | 2 +- pandas/core/generic.py | 3 +++ pandas/tests/frame/methods/test_drop.py | 12 ++++++++++++ pandas/tests/series/methods/test_drop.py | 14 +++++++++++++- 4 files changed, 29 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.4.2.rst b/doc/source/whatsnew/v1.4.2.rst index 9ba7461b830da..fdac1c385499a 100644 --- a/doc/source/whatsnew/v1.4.2.rst +++ b/doc/source/whatsnew/v1.4.2.rst @@ -14,7 +14,7 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ -- +- Fixed regression in :meth:`DataFrame.drop` and :meth:`Series.drop` when :class:`Index` had extension dtype and duplicates (:issue:`45820`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b329f4fb00ccc..d4e379e1181a0 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4342,6 +4342,9 @@ def _drop_axis( if errors == "raise" and labels_missing: raise KeyError(f"{labels} not found in axis") + if is_extension_array_dtype(mask.dtype): + mask = mask.to_numpy() + indexer = mask.nonzero()[0] new_axis = axis.take(indexer) diff --git a/pandas/tests/frame/methods/test_drop.py b/pandas/tests/frame/methods/test_drop.py index 4f8ea6eda1b5f..620aa856b89e4 100644 --- a/pandas/tests/frame/methods/test_drop.py +++ b/pandas/tests/frame/methods/test_drop.py @@ -537,3 +537,15 @@ def test_drop_level_missing_label_multiindex(self): df = DataFrame(index=MultiIndex.from_product([range(3), range(3)])) with pytest.raises(KeyError, match="labels \\[5\\] not found in level"): df.drop(5, level=0) + + @pytest.mark.parametrize("idx, level", [(["a", "b"], 0), (["a"], None)]) + def test_drop_index_ea_dtype(self, any_numeric_ea_dtype, idx, level): + # GH#45860 + df = DataFrame( + {"a": [1, 2, 2], "b": 100}, dtype=any_numeric_ea_dtype + ).set_index(idx) + result = df.drop(Index([2]), level=level) + expected = DataFrame( + {"a": [1], "b": 100}, dtype=any_numeric_ea_dtype + ).set_index(idx) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/series/methods/test_drop.py b/pandas/tests/series/methods/test_drop.py index a625e890393a6..c960c281b2b95 100644 --- a/pandas/tests/series/methods/test_drop.py +++ b/pandas/tests/series/methods/test_drop.py @@ -1,6 +1,9 @@ import pytest -from pandas import Series +from pandas import ( + Index, + Series, +) import pandas._testing as tm @@ -98,3 +101,12 @@ def test_drop_pos_args_deprecation(): result = ser.drop(1, 0) expected = Series([1, 3], index=[0, 2]) tm.assert_series_equal(result, expected) + + +def test_drop_index_ea_dtype(any_numeric_ea_dtype): + # GH#45860 + df = Series(100, index=Index([1, 2, 2], dtype=any_numeric_ea_dtype)) + idx = Index([df.index[1]]) + result = df.drop(idx) + expected = Series(100, index=Index([1], dtype=any_numeric_ea_dtype)) + tm.assert_series_equal(result, expected) From 90bff0d7c555e90e7b7b71c2638efbdbb2a50ee1 Mon Sep 17 00:00:00 2001 From: phofl Date: Mon, 14 Feb 2022 17:15:00 +0100 Subject: [PATCH 2/4] Add gh reference and dtype --- pandas/core/generic.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index d4e379e1181a0..d155ed95ae3eb 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4343,7 +4343,8 @@ def _drop_axis( raise KeyError(f"{labels} not found in axis") if is_extension_array_dtype(mask.dtype): - mask = mask.to_numpy() + # GH#45860 + mask = mask.to_numpy(dtype=bool) indexer = mask.nonzero()[0] new_axis = axis.take(indexer) From 36f7e3db720bcf0503ad1262fa24228f241ff6fa Mon Sep 17 00:00:00 2001 From: phofl Date: Mon, 14 Feb 2022 20:42:41 +0100 Subject: [PATCH 3/4] Add NA --- pandas/tests/frame/methods/test_drop.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/methods/test_drop.py b/pandas/tests/frame/methods/test_drop.py index 620aa856b89e4..b83fc4c0d7c9c 100644 --- a/pandas/tests/frame/methods/test_drop.py +++ b/pandas/tests/frame/methods/test_drop.py @@ -542,10 +542,10 @@ def test_drop_level_missing_label_multiindex(self): def test_drop_index_ea_dtype(self, any_numeric_ea_dtype, idx, level): # GH#45860 df = DataFrame( - {"a": [1, 2, 2], "b": 100}, dtype=any_numeric_ea_dtype + {"a": [1, 2, 2, pd.NA], "b": 100}, dtype=any_numeric_ea_dtype ).set_index(idx) result = df.drop(Index([2]), level=level) expected = DataFrame( - {"a": [1], "b": 100}, dtype=any_numeric_ea_dtype + {"a": [1, pd.NA], "b": 100}, dtype=any_numeric_ea_dtype ).set_index(idx) tm.assert_frame_equal(result, expected) From 677439f6eabea253e9dd80adaa8d844b59b0d7a6 Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 15 Feb 2022 17:10:36 +0100 Subject: [PATCH 4/4] Add NA on both sides --- pandas/tests/frame/methods/test_drop.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/methods/test_drop.py b/pandas/tests/frame/methods/test_drop.py index b83fc4c0d7c9c..50b60f9e06ef1 100644 --- a/pandas/tests/frame/methods/test_drop.py +++ b/pandas/tests/frame/methods/test_drop.py @@ -544,8 +544,8 @@ def test_drop_index_ea_dtype(self, any_numeric_ea_dtype, idx, level): df = DataFrame( {"a": [1, 2, 2, pd.NA], "b": 100}, dtype=any_numeric_ea_dtype ).set_index(idx) - result = df.drop(Index([2]), level=level) + result = df.drop(Index([2, pd.NA]), level=level) expected = DataFrame( - {"a": [1, pd.NA], "b": 100}, dtype=any_numeric_ea_dtype + {"a": [1], "b": 100}, dtype=any_numeric_ea_dtype ).set_index(idx) tm.assert_frame_equal(result, expected)