From 6b9b2103425014c1c3e900546572030029bc0232 Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Wed, 14 Jun 2023 18:12:15 -0400 Subject: [PATCH] BUG: astype('category') on dataframe backed by non-writeable arrays raises ValueError --- doc/source/whatsnew/v2.1.0.rst | 1 + pandas/_libs/hashtable_class_helper.pxi.in | 2 +- pandas/tests/extension/test_categorical.py | 9 +++++++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 42b1346696bb8..17975bd57657b 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -341,6 +341,7 @@ Bug fixes Categorical ^^^^^^^^^^^ +- Bug in :meth:`Series.astype` with ``dtype="category"`` for nullable arrays with read-only null value masks (:issue:`53658`) - Bug in :meth:`Series.map` , where the value of the ``na_action`` parameter was not used if the series held a :class:`Categorical` (:issue:`22527`). - diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index a3a1cdf374bb1..1cf5d734705af 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -648,7 +648,7 @@ cdef class {{name}}HashTable(HashTable): UInt8Vector result_mask UInt8VectorData *rmd bint use_na_value, use_mask, seen_na = False - uint8_t[:] mask_values + const uint8_t[:] mask_values if return_inverse: labels = np.empty(n, dtype=np.intp) diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index f331449489bcc..91ca358ca0709 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -319,3 +319,12 @@ def test_repr_2d(self, data): res = repr(data.reshape(-1, 1)) assert res.count("\nCategories") == 1 + + +def test_astype_category_readonly_mask_values(): + # GH 53658 + df = pd.DataFrame([0, 1, 2], dtype="Int64") + df._mgr.arrays[0]._mask.flags["WRITEABLE"] = False + result = df.astype("category") + expected = pd.DataFrame([0, 1, 2], dtype="Int64").astype("category") + tm.assert_frame_equal(result, expected)