Skip to content

Commit 817b25a

Browse files
REGR: undocumented astype("category").astype(str) type inconsistency between pandas 1.1 & 1.2 (#42087)
1 parent 74a1e67 commit 817b25a

File tree

3 files changed

+14
-1
lines changed

3 files changed

+14
-1
lines changed

doc/source/whatsnew/v1.2.5.rst

+1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ Fixed regressions
1919
- Regression in :func:`read_csv` when using ``memory_map=True`` with an non-UTF8 encoding (:issue:`40986`)
2020
- Regression in :meth:`DataFrame.replace` and :meth:`Series.replace` when the values to replace is a NumPy float array (:issue:`40371`)
2121
- Regression in :func:`ExcelFile` when a corrupt file is opened but not closed (:issue:`41778`)
22+
- Fixed regression in :meth:`DataFrame.astype` with ``dtype=str`` failing to convert ``NaN`` in categorical columns (:issue:`41797`)
2223

2324
.. ---------------------------------------------------------------------------
2425

pandas/core/arrays/categorical.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
NaT,
2727
algos as libalgos,
2828
hashtable as htable,
29+
lib,
2930
)
3031
from pandas._libs.arrays import NDArrayBacked
3132
from pandas._libs.lib import no_default
@@ -523,14 +524,17 @@ def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
523524
try:
524525
new_cats = np.asarray(self.categories)
525526
new_cats = new_cats.astype(dtype=dtype, copy=copy)
527+
fill_value = lib.item_from_zerodim(np.array(np.nan).astype(dtype))
526528
except (
527529
TypeError, # downstream error msg for CategoricalIndex is misleading
528530
ValueError,
529531
):
530532
msg = f"Cannot cast {self.categories.dtype} dtype to {dtype}"
531533
raise ValueError(msg)
532534

533-
result = take_nd(new_cats, ensure_platform_int(self._codes))
535+
result = take_nd(
536+
new_cats, ensure_platform_int(self._codes), fill_value=fill_value
537+
)
534538

535539
return result
536540

pandas/tests/frame/methods/test_astype.py

+8
Original file line numberDiff line numberDiff line change
@@ -698,3 +698,11 @@ def test_categorical_astype_to_int(self, any_int_or_nullable_int_dtype):
698698
{"col1": pd.array([2, 1, 3], dtype=any_int_or_nullable_int_dtype)}
699699
)
700700
tm.assert_frame_equal(df, expected)
701+
702+
def test_astype_categorical_to_string_missing(self):
703+
# https://github.com/pandas-dev/pandas/issues/41797
704+
df = DataFrame(["a", "b", np.nan])
705+
expected = df.astype(str)
706+
cat = df.astype("category")
707+
result = cat.astype(str)
708+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)