diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index bbdcd183f65e1..7ff39525bf80d 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -652,7 +652,7 @@ Strings ^^^^^^^ - Bug in :meth:`Series.to_string`, :meth:`DataFrame.to_string`, and :meth:`DataFrame.to_latex` adding a leading space when ``index=False`` (:issue:`24980`) - Bug in :func:`to_numeric` raising a ``TypeError`` when attempting to convert a string dtype Series containing only numeric strings and ``NA`` (:issue:`37262`) -- +- Bug in :meth:`astype` raising ``ValueError`` when attempting to convert ``string`` dtype to ``timedelta64`` (:issue:`38509`) Interval ^^^^^^^^ diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index cc2013deb5252..4ca8f0e0c879b 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -21,6 +21,7 @@ from pandas.core.arrays import FloatingArray, IntegerArray, PandasArray from pandas.core.arrays.floating import FloatingDtype from pandas.core.arrays.integer import _IntegerDtype +from pandas.core.arrays.timedeltas import sequence_to_td64ns from pandas.core.construction import extract_array from pandas.core.indexers import check_array_indexer from pandas.core.missing import isna @@ -308,6 +309,13 @@ def astype(self, dtype, copy=True): values = arr.astype(dtype) values[mask] = np.nan return values + elif np.issubdtype(dtype, np.timedelta64): + # GH 38509: handle conversion to timedelta64 + arr = self.copy() + mask = self.isna() + arr[mask] = "NaT" + values, _ = sequence_to_td64ns(arr) + return values return super().astype(dtype, copy) diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index 5365929213503..a3b3dec7cda0d 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -5,7 +5,11 @@ import pandas.util._test_decorators as td -from pandas.core.dtypes.common import is_dtype_equal +from pandas.core.dtypes.common import ( + is_datetime64_dtype, + is_dtype_equal, + is_timedelta64_dtype, +) import pandas as pd import pandas._testing as tm @@ -123,24 +127,31 @@ def test_string_methods(input, method, dtype, request): tm.assert_series_equal(result.astype(object), expected) -def test_astype_roundtrip(dtype, request): +@pytest.mark.parametrize( + "input", + [ + pd.date_range("2000", periods=12), + pd.timedelta_range("1 D", periods=3), + ], +) +def test_astype_roundtrip(input, dtype, request): + # GH 38509 (timedelta) if dtype == "arrow_string": - reason = "ValueError: Could not convert object to NumPy datetime" - mark = pytest.mark.xfail(reason=reason, raises=ValueError) - request.node.add_marker(mark) - else: - mark = pytest.mark.xfail( - reason="GH#36153 casting from StringArray to dt64 fails", raises=ValueError - ) + if is_datetime64_dtype(input): + typename = "datetime" + elif is_timedelta64_dtype(input): + typename = "timedelta" + reason = f"ValueError: Could not convert object to NumPy {typename}" + mark = pytest.mark.xfail(reason=reason) request.node.add_marker(mark) - ser = pd.Series(pd.date_range("2000", periods=12)) + ser = pd.Series(input) ser[0] = None casted = ser.astype(dtype) assert is_dtype_equal(casted.dtype, dtype) - result = casted.astype("datetime64[ns]") + result = casted.astype(ser.dtype) tm.assert_series_equal(result, ser)