diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index d8dc8ae68c347..f83aa0ce87d3b 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -217,6 +217,8 @@ Categorical - Added test to assert the :func:`fillna` raises the correct ValueError message when the value isn't a value from categories (:issue:`13628`) - Bug in :meth:`Categorical.astype` where ``NaN`` values were handled incorrectly when casting to int (:issue:`28406`) +- Bug in :meth:`Categorical.astype` not allowing for casting to extension dtypes (:issue:`28668`) +- Bug where :func:`merge` was unable to join on categorical and extension dtype columns (:issue:`28668`) - :meth:`Categorical.searchsorted` and :meth:`CategoricalIndex.searchsorted` now work on unordered categoricals also (:issue:`21667`) - Added test to assert roundtripping to parquet with :func:`DataFrame.to_parquet` or :func:`read_parquet` will preserve Categorical dtypes for string types (:issue:`27955`) - diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 33d1de01fa3db..b7431033dae59 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -57,7 +57,7 @@ ) from pandas.core.base import NoNewAttributesMixin, PandasObject, _shared_docs import pandas.core.common as com -from pandas.core.construction import extract_array, sanitize_array +from pandas.core.construction import array, extract_array, sanitize_array from pandas.core.missing import interpolate_2d from pandas.core.sorting import nargsort @@ -520,6 +520,8 @@ def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike: if dtype == self.dtype: return self return self._set_dtype(dtype) + if is_extension_array_dtype(dtype): + return array(self, dtype=dtype, copy=copy) # type: ignore # GH 28770 if is_integer_dtype(dtype) and self.isna().any(): msg = "Cannot convert float NaN to integer" raise ValueError(msg) diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index c342777b0ebc4..e70e4f2fe501b 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -207,6 +207,22 @@ def test_cast_nan_to_int(self, cls, values): with pytest.raises((ValueError, TypeError), match=msg): s.astype(int) + @pytest.mark.parametrize( + "expected", + [ + pd.Series(["2019", "2020"], dtype="datetime64[ns, UTC]"), + pd.Series([0, 0], dtype="timedelta64[ns]"), + pd.Series([pd.Period("2019"), pd.Period("2020")], dtype="period[A-DEC]"), + pd.Series([pd.Interval(0, 1), pd.Interval(1, 2)], dtype="interval"), + pd.Series([1, np.nan], dtype="Int64"), + ], + ) + def test_cast_category_to_extension_dtype(self, expected): + # GH 28668 + result = expected.astype("category").astype(expected.dtype) + + tm.assert_series_equal(result, expected) + class TestArithmeticOps(base.BaseArithmeticOpsTests): def test_arith_series_with_scalar(self, data, all_arithmetic_operators): diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 63de9777756cc..4de8bba169438 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -2096,6 +2096,20 @@ def test_merge_equal_cat_dtypes2(): tm.assert_frame_equal(result, expected, check_categorical=False) +def test_merge_on_cat_and_ext_array(): + # GH 28668 + right = DataFrame( + {"a": Series([pd.Interval(0, 1), pd.Interval(1, 2)], dtype="interval")} + ) + left = right.copy() + left["a"] = left["a"].astype("category") + + result = pd.merge(left, right, how="inner", on="a") + expected = right.copy() + + assert_frame_equal(result, expected) + + def test_merge_multiindex_columns(): # Issue #28518 # Verify that merging two dataframes give the expected labels