Skip to content

BUG: .cat changing dtype inplace #43597

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Sep 17, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v1.3.4.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ Fixed regressions
~~~~~~~~~~~~~~~~~
- Fixed regression in :meth:`merge` with integer and ``NaN`` keys failing with ``outer`` merge (:issue:`43550`)
- Fixed performance regression in :meth:`MultiIndex.equals` (:issue:`43549`)
- Fixed regression in :meth:`Series.cat.reorder_categories` failing to update the categories on the ``Series`` (:issue:`43232`)
- Fixed regression in :meth:`Series.cat.categories` setter failing to update the categories on the ``Series`` (:issue:`43334`)
-

.. ---------------------------------------------------------------------------
Expand Down
5 changes: 3 additions & 2 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
cache_readonly,
deprecate_kwarg,
)
from pandas.util._exceptions import find_stack_level
from pandas.util._validators import validate_bool_kwarg

from pandas.core.dtypes.cast import (
Expand Down Expand Up @@ -1116,10 +1117,10 @@ def reorder_categories(self, new_categories, ordered=None, inplace=no_default):
warn(
"The `inplace` parameter in pandas.Categorical."
"reorder_categories is deprecated and will be removed in "
"a future version. Removing unused categories will always "
"a future version. Reordering categories will always "
"return a new Categorical object.",
FutureWarning,
stacklevel=2,
stacklevel=find_stack_level(),
)
else:
inplace = False
Expand Down
7 changes: 6 additions & 1 deletion pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,6 @@ def getitem_block_columns(self, slicer, new_mgr_locs: BlockPlacement) -> Block:
def shape(self) -> Shape:
return self.values.shape

@final
@cache_readonly
def dtype(self) -> DtypeObj:
return self.values.dtype
Expand Down Expand Up @@ -1881,6 +1880,12 @@ class CategoricalBlock(ExtensionBlock):
# this Block type is kept for backwards-compatibility
__slots__ = ()

# GH#43232, GH#43334 self.values.dtype can be changed inplace until 2.0,
# so this cannot be cached
@property
def dtype(self) -> DtypeObj:
return self.values.dtype


# -----------------------------------------------------------------
# Constructor Helpers
Expand Down
40 changes: 40 additions & 0 deletions pandas/tests/series/accessors/test_cat_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,3 +249,43 @@ def test_dt_accessor_api_for_categorical(self):
with pytest.raises(AttributeError, match=msg):
invalid.dt
assert not hasattr(invalid, "str")

def test_reorder_categories_updates_dtype(self):
# GH#43232
ser = Series(["a", "b", "c"], dtype="category")
orig_dtype = ser.dtype

# Need to construct this before calling reorder_categories inplace
expected = ser.cat.reorder_categories(["c", "b", "a"])

with tm.assert_produces_warning(FutureWarning, match="`inplace` parameter"):
ser.cat.reorder_categories(["c", "b", "a"], inplace=True)

assert not orig_dtype.categories.equals(ser.dtype.categories)
assert not orig_dtype.categories.equals(expected.dtype.categories)
assert ser.dtype == expected.dtype
assert ser.dtype.categories.equals(expected.dtype.categories)

tm.assert_series_equal(ser, expected)

def test_set_categories_setitem(self):
# GH#43334

df = DataFrame({"Survived": [1, 0, 1], "Sex": [0, 1, 1]}, dtype="category")

# change the dtype in-place
df["Survived"].cat.categories = ["No", "Yes"]
df["Sex"].cat.categories = ["female", "male"]

# values should not be coerced to NaN
assert list(df["Sex"]) == ["female", "male", "male"]
assert list(df["Survived"]) == ["Yes", "No", "Yes"]

df["Sex"] = Categorical(df["Sex"], categories=["female", "male"], ordered=False)
df["Survived"] = Categorical(
df["Survived"], categories=["No", "Yes"], ordered=False
)

# values should not be coerced to NaN
assert list(df["Sex"]) == ["female", "male", "male"]
assert list(df["Survived"]) == ["Yes", "No", "Yes"]