Skip to content

Commit 16d51c2

Browse files
authored
Fixed regression in Series.duplicated for categorical dtype with bool categories (#44356) (#44402)
(cherry picked from commit 9f54f70)
1 parent 950fcd7 commit 16d51c2

File tree

4 files changed

+34
-2
lines changed

4 files changed

+34
-2
lines changed

doc/source/whatsnew/v1.3.5.rst

+1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ Fixed regressions
1616
~~~~~~~~~~~~~~~~~
1717
- Fixed regression in :meth:`Series.equals` when comparing floats with dtype object to None (:issue:`44190`)
1818
- Fixed performance regression in :func:`read_csv` (:issue:`44106`)
19+
- Fixed regression in :meth:`Series.duplicated` and :meth:`Series.drop_duplicates` when Series has :class:`Categorical` dtype with boolean categories (:issue:`44351`)
1920
-
2021

2122
.. ---------------------------------------------------------------------------

pandas/core/algorithms.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ def _ensure_data(values: ArrayLike) -> tuple[np.ndarray, DtypeObj]:
139139
# i.e. all-bool Categorical, BooleanArray
140140
try:
141141
return np.asarray(values).astype("uint8", copy=False), values.dtype
142-
except TypeError:
142+
except (TypeError, ValueError):
143143
# GH#42107 we have pd.NAs present
144144
return np.asarray(values), values.dtype
145145

pandas/tests/series/methods/test_drop_duplicates.py

+15
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import pytest
33

44
from pandas import (
5+
NA,
56
Categorical,
67
Series,
78
)
@@ -224,6 +225,20 @@ def test_drop_duplicates_categorical_bool(self, ordered):
224225
assert return_value is None
225226
tm.assert_series_equal(sc, tc[~expected])
226227

228+
def test_drop_duplicates_categorical_bool_na(self):
229+
# GH#44351
230+
ser = Series(
231+
Categorical(
232+
[True, False, True, False, NA], categories=[True, False], ordered=True
233+
)
234+
)
235+
result = ser.drop_duplicates()
236+
expected = Series(
237+
Categorical([True, False, np.nan], categories=[True, False], ordered=True),
238+
index=[0, 1, 4],
239+
)
240+
tm.assert_series_equal(result, expected)
241+
227242

228243
def test_drop_duplicates_pos_args_deprecation():
229244
# GH#41485

pandas/tests/series/methods/test_duplicated.py

+17-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
import numpy as np
22
import pytest
33

4-
from pandas import Series
4+
from pandas import (
5+
NA,
6+
Categorical,
7+
Series,
8+
)
59
import pandas._testing as tm
610

711

@@ -33,3 +37,15 @@ def test_duplicated_nan_none(keep, expected):
3337

3438
result = ser.duplicated(keep=keep)
3539
tm.assert_series_equal(result, expected)
40+
41+
42+
def test_duplicated_categorical_bool_na():
43+
# GH#44351
44+
ser = Series(
45+
Categorical(
46+
[True, False, True, False, NA], categories=[True, False], ordered=True
47+
)
48+
)
49+
result = ser.duplicated()
50+
expected = Series([False, False, True, True, False])
51+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)