Skip to content

Commit 48f1a67

Browse files
dsaxtonjreback
authored andcommitted
BUG: Allow cast from cat to extension dtype (#28762)
1 parent a1b2c4b commit 48f1a67

File tree

4 files changed

+35
-1
lines changed

4 files changed

+35
-1
lines changed

doc/source/whatsnew/v1.0.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,8 @@ Categorical
219219

220220
- Added test to assert the :func:`fillna` raises the correct ValueError message when the value isn't a value from categories (:issue:`13628`)
221221
- Bug in :meth:`Categorical.astype` where ``NaN`` values were handled incorrectly when casting to int (:issue:`28406`)
222+
- Bug in :meth:`Categorical.astype` not allowing for casting to extension dtypes (:issue:`28668`)
223+
- Bug where :func:`merge` was unable to join on categorical and extension dtype columns (:issue:`28668`)
222224
- :meth:`Categorical.searchsorted` and :meth:`CategoricalIndex.searchsorted` now work on unordered categoricals also (:issue:`21667`)
223225
- Added test to assert roundtripping to parquet with :func:`DataFrame.to_parquet` or :func:`read_parquet` will preserve Categorical dtypes for string types (:issue:`27955`)
224226
-

pandas/core/arrays/categorical.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@
5757
)
5858
from pandas.core.base import NoNewAttributesMixin, PandasObject, _shared_docs
5959
import pandas.core.common as com
60-
from pandas.core.construction import extract_array, sanitize_array
60+
from pandas.core.construction import array, extract_array, sanitize_array
6161
from pandas.core.missing import interpolate_2d
6262
from pandas.core.sorting import nargsort
6363

@@ -520,6 +520,8 @@ def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
520520
if dtype == self.dtype:
521521
return self
522522
return self._set_dtype(dtype)
523+
if is_extension_array_dtype(dtype):
524+
return array(self, dtype=dtype, copy=copy) # type: ignore # GH 28770
523525
if is_integer_dtype(dtype) and self.isna().any():
524526
msg = "Cannot convert float NaN to integer"
525527
raise ValueError(msg)

pandas/tests/extension/test_categorical.py

+16
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,22 @@ def test_cast_nan_to_int(self, cls, values):
207207
with pytest.raises((ValueError, TypeError), match=msg):
208208
s.astype(int)
209209

210+
@pytest.mark.parametrize(
211+
"expected",
212+
[
213+
pd.Series(["2019", "2020"], dtype="datetime64[ns, UTC]"),
214+
pd.Series([0, 0], dtype="timedelta64[ns]"),
215+
pd.Series([pd.Period("2019"), pd.Period("2020")], dtype="period[A-DEC]"),
216+
pd.Series([pd.Interval(0, 1), pd.Interval(1, 2)], dtype="interval"),
217+
pd.Series([1, np.nan], dtype="Int64"),
218+
],
219+
)
220+
def test_cast_category_to_extension_dtype(self, expected):
221+
# GH 28668
222+
result = expected.astype("category").astype(expected.dtype)
223+
224+
tm.assert_series_equal(result, expected)
225+
210226

211227
class TestArithmeticOps(base.BaseArithmeticOpsTests):
212228
def test_arith_series_with_scalar(self, data, all_arithmetic_operators):

pandas/tests/reshape/merge/test_merge.py

+14
Original file line numberDiff line numberDiff line change
@@ -2096,6 +2096,20 @@ def test_merge_equal_cat_dtypes2():
20962096
tm.assert_frame_equal(result, expected, check_categorical=False)
20972097

20982098

2099+
def test_merge_on_cat_and_ext_array():
2100+
# GH 28668
2101+
right = DataFrame(
2102+
{"a": Series([pd.Interval(0, 1), pd.Interval(1, 2)], dtype="interval")}
2103+
)
2104+
left = right.copy()
2105+
left["a"] = left["a"].astype("category")
2106+
2107+
result = pd.merge(left, right, how="inner", on="a")
2108+
expected = right.copy()
2109+
2110+
assert_frame_equal(result, expected)
2111+
2112+
20992113
def test_merge_multiindex_columns():
21002114
# Issue #28518
21012115
# Verify that merging two dataframes give the expected labels

0 commit comments

Comments
 (0)