diff --git a/doc/source/whatsnew/v1.2.5.rst b/doc/source/whatsnew/v1.2.5.rst index d0af23b48b1f7..5b8b5eb9e651c 100644 --- a/doc/source/whatsnew/v1.2.5.rst +++ b/doc/source/whatsnew/v1.2.5.rst @@ -19,6 +19,7 @@ Fixed regressions - Regression in :func:`read_csv` when using ``memory_map=True`` with an non-UTF8 encoding (:issue:`40986`) - Regression in :meth:`DataFrame.replace` and :meth:`Series.replace` when the values to replace is a NumPy float array (:issue:`40371`) - Regression in :func:`ExcelFile` when a corrupt file is opened but not closed (:issue:`41778`) +- Fixed regression in :meth:`DataFrame.astype` with ``dtype=str`` failing to convert ``NaN`` in categorical columns (:issue:`41797`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index ecc45357db8c1..3fdb52a73dc3e 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -26,6 +26,7 @@ NaT, algos as libalgos, hashtable as htable, + lib, ) from pandas._libs.arrays import NDArrayBacked from pandas._libs.lib import no_default @@ -523,6 +524,7 @@ def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike: try: new_cats = np.asarray(self.categories) new_cats = new_cats.astype(dtype=dtype, copy=copy) + fill_value = lib.item_from_zerodim(np.array(np.nan).astype(dtype)) except ( TypeError, # downstream error msg for CategoricalIndex is misleading ValueError, @@ -530,7 +532,9 @@ def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike: msg = f"Cannot cast {self.categories.dtype} dtype to {dtype}" raise ValueError(msg) - result = take_nd(new_cats, ensure_platform_int(self._codes)) + result = take_nd( + new_cats, ensure_platform_int(self._codes), fill_value=fill_value + ) return result diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py index bd71f0a0716b4..f098582ca04c6 100644 --- a/pandas/tests/frame/methods/test_astype.py +++ b/pandas/tests/frame/methods/test_astype.py @@ -698,3 +698,11 @@ def test_categorical_astype_to_int(self, any_int_or_nullable_int_dtype): {"col1": pd.array([2, 1, 3], dtype=any_int_or_nullable_int_dtype)} ) tm.assert_frame_equal(df, expected) + + def test_astype_categorical_to_string_missing(self): + # https://github.com/pandas-dev/pandas/issues/41797 + df = DataFrame(["a", "b", np.nan]) + expected = df.astype(str) + cat = df.astype("category") + result = cat.astype(str) + tm.assert_frame_equal(result, expected)