Skip to content

Commit ac0d34b

Browse files
Backport PR pandas-dev#42087: REGR: undocumented astype("category").astype(str) type inconsistency between pandas 1.1 & 1.2
1 parent 5c5482a commit ac0d34b

File tree

3 files changed

+16
-2
lines changed

3 files changed

+16
-2
lines changed

doc/source/whatsnew/v1.2.5.rst

+1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ Fixed regressions
1919
- Regression in :func:`read_csv` when using ``memory_map=True`` with an non-UTF8 encoding (:issue:`40986`)
2020
- Regression in :meth:`DataFrame.replace` and :meth:`Series.replace` when the values to replace is a NumPy float array (:issue:`40371`)
2121
- Regression in :func:`ExcelFile` when a corrupt file is opened but not closed (:issue:`41778`)
22+
- Fixed regression in :meth:`DataFrame.astype` with ``dtype=str`` failing to convert ``NaN`` in categorical columns (:issue:`41797`)
2223

2324
.. ---------------------------------------------------------------------------
2425

pandas/core/arrays/categorical.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
from pandas._config import get_option
1111

12-
from pandas._libs import NaT, algos as libalgos, hashtable as htable
12+
from pandas._libs import NaT, algos as libalgos, hashtable as htable, lib
1313
from pandas._libs.lib import no_default
1414
from pandas._typing import ArrayLike, Dtype, Ordered, Scalar
1515
from pandas.compat.numpy import function as nv
@@ -429,14 +429,19 @@ def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
429429
try:
430430
new_cats = np.asarray(self.categories)
431431
new_cats = new_cats.astype(dtype=dtype, copy=copy)
432+
fill_value = lib.item_from_zerodim(np.array(np.nan).astype(dtype))
432433
except (
433434
TypeError, # downstream error msg for CategoricalIndex is misleading
434435
ValueError,
435436
):
436437
msg = f"Cannot cast {self.categories.dtype} dtype to {dtype}"
437438
raise ValueError(msg)
438439

439-
result = take_1d(new_cats, libalgos.ensure_platform_int(self._codes))
440+
result = take_1d(
441+
new_cats,
442+
libalgos.ensure_platform_int(self._codes),
443+
fill_value=fill_value,
444+
)
440445

441446
return result
442447

pandas/tests/frame/methods/test_astype.py

+8
Original file line numberDiff line numberDiff line change
@@ -616,3 +616,11 @@ def test_astype_bytes(self):
616616
# GH#39474
617617
result = DataFrame(["foo", "bar", "baz"]).astype(bytes)
618618
assert result.dtypes[0] == np.dtype("S3")
619+
620+
def test_astype_categorical_to_string_missing(self):
621+
# https://github.com/pandas-dev/pandas/issues/41797
622+
df = DataFrame(["a", "b", np.nan])
623+
expected = df.astype(str)
624+
cat = df.astype("category")
625+
result = cat.astype(str)
626+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)