diff --git a/doc/source/whatsnew/v1.3.5.rst b/doc/source/whatsnew/v1.3.5.rst index 589092c0dd7e3..951b05b65c81b 100644 --- a/doc/source/whatsnew/v1.3.5.rst +++ b/doc/source/whatsnew/v1.3.5.rst @@ -16,6 +16,7 @@ Fixed regressions ~~~~~~~~~~~~~~~~~ - Fixed regression in :meth:`Series.equals` when comparing floats with dtype object to None (:issue:`44190`) - Fixed performance regression in :func:`read_csv` (:issue:`44106`) +- Fixed regression in :meth:`Series.duplicated` and :meth:`Series.drop_duplicates` when Series has :class:`Categorical` dtype with boolean categories (:issue:`44351`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index c1b587ce3a6b2..8c2c01b6aedc8 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -148,7 +148,7 @@ def _ensure_data(values: ArrayLike) -> np.ndarray: # i.e. all-bool Categorical, BooleanArray try: return np.asarray(values).astype("uint8", copy=False) - except TypeError: + except (TypeError, ValueError): # GH#42107 we have pd.NAs present return np.asarray(values) diff --git a/pandas/tests/series/methods/test_drop_duplicates.py b/pandas/tests/series/methods/test_drop_duplicates.py index 7eb51f8037792..f72d85337df8e 100644 --- a/pandas/tests/series/methods/test_drop_duplicates.py +++ b/pandas/tests/series/methods/test_drop_duplicates.py @@ -2,6 +2,7 @@ import pytest from pandas import ( + NA, Categorical, Series, ) @@ -224,6 +225,20 @@ def test_drop_duplicates_categorical_bool(self, ordered): assert return_value is None tm.assert_series_equal(sc, tc[~expected]) + def test_drop_duplicates_categorical_bool_na(self): + # GH#44351 + ser = Series( + Categorical( + [True, False, True, False, NA], categories=[True, False], ordered=True + ) + ) + result = ser.drop_duplicates() + expected = Series( + Categorical([True, False, np.nan], categories=[True, False], ordered=True), + index=[0, 1, 4], + ) + tm.assert_series_equal(result, expected) + def test_drop_duplicates_pos_args_deprecation(): # GH#41485 diff --git a/pandas/tests/series/methods/test_duplicated.py b/pandas/tests/series/methods/test_duplicated.py index 5cc297913e851..c61492168da63 100644 --- a/pandas/tests/series/methods/test_duplicated.py +++ b/pandas/tests/series/methods/test_duplicated.py @@ -1,7 +1,11 @@ import numpy as np import pytest -from pandas import Series +from pandas import ( + NA, + Categorical, + Series, +) import pandas._testing as tm @@ -33,3 +37,15 @@ def test_duplicated_nan_none(keep, expected): result = ser.duplicated(keep=keep) tm.assert_series_equal(result, expected) + + +def test_duplicated_categorical_bool_na(): + # GH#44351 + ser = Series( + Categorical( + [True, False, True, False, NA], categories=[True, False], ordered=True + ) + ) + result = ser.duplicated() + expected = Series([False, False, True, True, False]) + tm.assert_series_equal(result, expected)