Skip to content

Commit 5cb0916

Browse files
authored
Fix StringArray.astype for category dtype (#40450)
1 parent cc84a23 commit 5cb0916

File tree

3 files changed

+36
-0
lines changed

3 files changed

+36
-0
lines changed

doc/source/whatsnew/v1.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -572,6 +572,7 @@ Conversion
572572
- Bug in creating a :class:`DataFrame` from an empty ``np.recarray`` not retaining the original dtypes (:issue:`40121`)
573573
- Bug in :class:`DataFrame` failing to raise ``TypeError`` when constructing from a ``frozenset`` (:issue:`40163`)
574574
- Bug in :class:`Index` construction silently ignoring a passed ``dtype`` when the data cannot be cast to that dtype (:issue:`21311`)
575+
- Bug in :meth:`StringArray.astype` falling back to numpy and raising when converting to ``dtype='categorical'`` (:issue:`40450`)
575576
- Bug in :class:`DataFrame` construction with a dictionary containing an arraylike with ``ExtensionDtype`` and ``copy=True`` failing to make a copy (:issue:`38939`)
576577
-
577578

pandas/core/arrays/string_.py

+3
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,9 @@ def astype(self, dtype, copy=True):
327327
arr[mask] = "0"
328328
values = arr.astype(dtype.numpy_dtype)
329329
return FloatingArray(values, mask, copy=False)
330+
elif isinstance(dtype, ExtensionDtype):
331+
cls = dtype.construct_array_type()
332+
return cls._from_sequence(self, dtype=dtype, copy=copy)
330333
elif np.issubdtype(dtype, np.floating):
331334
arr = self._ndarray.copy()
332335
mask = self.isna()

pandas/tests/series/methods/test_astype.py

+32
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,38 @@ def test_astype_bytes(self):
350350
assert result.dtypes == np.dtype("S3")
351351

352352

353+
class TestAstypeString:
354+
@pytest.mark.parametrize(
355+
"data, dtype",
356+
[
357+
([True, NA], "boolean"),
358+
(["A", NA], "category"),
359+
(["2020-10-10", "2020-10-10"], "datetime64[ns]"),
360+
(["2020-10-10", "2020-10-10", NaT], "datetime64[ns]"),
361+
(
362+
["2012-01-01 00:00:00-05:00", NaT],
363+
"datetime64[ns, US/Eastern]",
364+
),
365+
([1, None], "UInt16"),
366+
(["1/1/2021", "2/1/2021"], "period[M]"),
367+
(["1/1/2021", "2/1/2021", NaT], "period[M]"),
368+
(["1 Day", "59 Days", NaT], "timedelta64[ns]"),
369+
# currently no way to parse IntervalArray from a list of strings
370+
],
371+
)
372+
def test_astype_string_to_extension_dtype_roundtrip(self, data, dtype, request):
373+
if dtype == "boolean" or (
374+
dtype in ("period[M]", "datetime64[ns]", "timedelta64[ns]") and NaT in data
375+
):
376+
mark = pytest.mark.xfail(
377+
reason="TODO StringArray.astype() with missing values #GH40566"
378+
)
379+
request.node.add_marker(mark)
380+
# GH-40351
381+
s = Series(data, dtype=dtype)
382+
tm.assert_series_equal(s, s.astype("string").astype(dtype))
383+
384+
353385
class TestAstypeCategorical:
354386
def test_astype_categorical_to_other(self):
355387
cat = Categorical([f"{i} - {i + 499}" for i in range(0, 10000, 500)])

0 commit comments

Comments
 (0)