Skip to content

Commit cee3583

Browse files
jbrockmendelmeeseeksmachine
authored andcommitted
Backport PR pandas-dev#43597: BUG: .cat changing dtype inplace
1 parent 5ba7252 commit cee3583

File tree

4 files changed

+51
-3
lines changed

4 files changed

+51
-3
lines changed

doc/source/whatsnew/v1.3.4.rst

+2
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ Fixed regressions
1616
~~~~~~~~~~~~~~~~~
1717
- Fixed regression in :meth:`merge` with integer and ``NaN`` keys failing with ``outer`` merge (:issue:`43550`)
1818
- Fixed performance regression in :meth:`MultiIndex.equals` (:issue:`43549`)
19+
- Fixed regression in :meth:`Series.cat.reorder_categories` failing to update the categories on the ``Series`` (:issue:`43232`)
20+
- Fixed regression in :meth:`Series.cat.categories` setter failing to update the categories on the ``Series`` (:issue:`43334`)
1921
-
2022

2123
.. ---------------------------------------------------------------------------

pandas/core/arrays/categorical.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
cache_readonly,
4545
deprecate_kwarg,
4646
)
47+
from pandas.util._exceptions import find_stack_level
4748
from pandas.util._validators import validate_bool_kwarg
4849

4950
from pandas.core.dtypes.cast import (
@@ -1096,10 +1097,10 @@ def reorder_categories(self, new_categories, ordered=None, inplace=no_default):
10961097
warn(
10971098
"The `inplace` parameter in pandas.Categorical."
10981099
"reorder_categories is deprecated and will be removed in "
1099-
"a future version. Removing unused categories will always "
1100+
"a future version. Reordering categories will always "
11001101
"return a new Categorical object.",
11011102
FutureWarning,
1102-
stacklevel=2,
1103+
stacklevel=find_stack_level(),
11031104
)
11041105
else:
11051106
inplace = False

pandas/core/internals/blocks.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -341,7 +341,6 @@ def getitem_block_columns(self, slicer, new_mgr_locs: BlockPlacement) -> Block:
341341
def shape(self) -> Shape:
342342
return self.values.shape
343343

344-
@final
345344
@cache_readonly
346345
def dtype(self) -> DtypeObj:
347346
return self.values.dtype
@@ -1845,6 +1844,12 @@ class CategoricalBlock(ExtensionBlock):
18451844
# this Block type is kept for backwards-compatibility
18461845
__slots__ = ()
18471846

1847+
# GH#43232, GH#43334 self.values.dtype can be changed inplace until 2.0,
1848+
# so this cannot be cached
1849+
@property
1850+
def dtype(self) -> DtypeObj:
1851+
return self.values.dtype
1852+
18481853

18491854
# -----------------------------------------------------------------
18501855
# Constructor Helpers

pandas/tests/series/accessors/test_cat_accessor.py

+40
Original file line numberDiff line numberDiff line change
@@ -249,3 +249,43 @@ def test_dt_accessor_api_for_categorical(self):
249249
with pytest.raises(AttributeError, match=msg):
250250
invalid.dt
251251
assert not hasattr(invalid, "str")
252+
253+
def test_reorder_categories_updates_dtype(self):
254+
# GH#43232
255+
ser = Series(["a", "b", "c"], dtype="category")
256+
orig_dtype = ser.dtype
257+
258+
# Need to construct this before calling reorder_categories inplace
259+
expected = ser.cat.reorder_categories(["c", "b", "a"])
260+
261+
with tm.assert_produces_warning(FutureWarning, match="`inplace` parameter"):
262+
ser.cat.reorder_categories(["c", "b", "a"], inplace=True)
263+
264+
assert not orig_dtype.categories.equals(ser.dtype.categories)
265+
assert not orig_dtype.categories.equals(expected.dtype.categories)
266+
assert ser.dtype == expected.dtype
267+
assert ser.dtype.categories.equals(expected.dtype.categories)
268+
269+
tm.assert_series_equal(ser, expected)
270+
271+
def test_set_categories_setitem(self):
272+
# GH#43334
273+
274+
df = DataFrame({"Survived": [1, 0, 1], "Sex": [0, 1, 1]}, dtype="category")
275+
276+
# change the dtype in-place
277+
df["Survived"].cat.categories = ["No", "Yes"]
278+
df["Sex"].cat.categories = ["female", "male"]
279+
280+
# values should not be coerced to NaN
281+
assert list(df["Sex"]) == ["female", "male", "male"]
282+
assert list(df["Survived"]) == ["Yes", "No", "Yes"]
283+
284+
df["Sex"] = Categorical(df["Sex"], categories=["female", "male"], ordered=False)
285+
df["Survived"] = Categorical(
286+
df["Survived"], categories=["No", "Yes"], ordered=False
287+
)
288+
289+
# values should not be coerced to NaN
290+
assert list(df["Sex"]) == ["female", "male", "male"]
291+
assert list(df["Survived"]) == ["Yes", "No", "Yes"]

0 commit comments

Comments
 (0)