From 40c0791c0ab55dbb25157ed70e85f858a7976b2d Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 5 Apr 2023 16:41:53 -0700 Subject: [PATCH 1/2] BUG: to_timedelta/datetime with numeric ArrowExtensionArray --- doc/source/whatsnew/v2.0.1.rst | 2 +- pandas/core/arrays/datetimelike.py | 9 +++++++-- pandas/tests/tools/test_to_datetime.py | 14 ++++++++++++++ pandas/tests/tools/test_to_timedelta.py | 14 ++++++++++++++ 4 files changed, 36 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v2.0.1.rst b/doc/source/whatsnew/v2.0.1.rst index 0122c84ba2a8e..202804eecf3e9 100644 --- a/doc/source/whatsnew/v2.0.1.rst +++ b/doc/source/whatsnew/v2.0.1.rst @@ -20,7 +20,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ -- +- Bug in :func:`to_datetime` and :func:`to_timedelta` when trying to convert numeric data with a :class:`ArrowDtype` (:issue:`52425`) .. --------------------------------------------------------------------------- .. _whatsnew_201.other: diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 162906200882b..b0ff9cf3fbeea 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -127,6 +127,7 @@ NDArrayBackedExtensionArray, ravel_compat, ) +from pandas.core.arrays.arrow.array import ArrowExtensionArray from pandas.core.arrays.base import ExtensionArray from pandas.core.arrays.integer import IntegerArray import pandas.core.common as com @@ -2209,10 +2210,14 @@ def ensure_arraylike_for_datetimelike(data, copy: bool, cls_name: str): else: data = extract_array(data, extract_numpy=True) - if isinstance(data, IntegerArray): + if isinstance(data, IntegerArray) or ( + isinstance(data, ArrowExtensionArray) and data.dtype.kind in "iu" + ): data = data.to_numpy("int64", na_value=iNaT) copy = False - elif not isinstance(data, (np.ndarray, ExtensionArray)): + elif not isinstance(data, (np.ndarray, ExtensionArray)) or isinstance( + data, ArrowExtensionArray + ): # GH#24539 e.g. xarray, dask object data = np.asarray(data) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 7d3aaf7fd3744..e5f2eab2daac9 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -22,6 +22,7 @@ iNaT, parsing, ) +from pandas.compat.pyarrow import pa_version_under7p0 from pandas.errors import ( OutOfBoundsDatetime, OutOfBoundsTimedelta, @@ -3586,3 +3587,16 @@ def test_ignoring_unknown_tz_deprecated(): with tm.assert_produces_warning(FutureWarning): res = to_datetime([dtstr]) tm.assert_index_equal(res, to_datetime([dtstr[:-5]])) + + +@pytest.mark.skipif(pa_version_under7p0, reason="Requires Pyarrow") +@pytest.mark.parametrize( + "pa_str_dtype", + tm.ALL_INT_PYARROW_DTYPES_STR_REPR + tm.FLOAT_PYARROW_DTYPES_STR_REPR, +) +def test_from_numeric_arrow_dtype(pa_str_dtype): + # GH 52425 + ser = Series([1, 2], dtype=pa_str_dtype) + result = to_datetime(ser) + expected = Series([1, 2], dtype="datetime64[ns]") + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/tools/test_to_timedelta.py b/pandas/tests/tools/test_to_timedelta.py index b27a0db4dfe98..65f99f0cdc460 100644 --- a/pandas/tests/tools/test_to_timedelta.py +++ b/pandas/tests/tools/test_to_timedelta.py @@ -6,6 +6,7 @@ import numpy as np import pytest +from pandas.compat.pyarrow import pa_version_under7p0 from pandas.errors import OutOfBoundsTimedelta import pandas as pd @@ -287,3 +288,16 @@ def test_to_timedelta_numeric_ea(self, any_numeric_ea_dtype): result = to_timedelta(ser) expected = Series([pd.Timedelta(1, unit="ns"), pd.NaT]) tm.assert_series_equal(result, expected) + + +@pytest.mark.skipif(pa_version_under7p0, reason="Requires Pyarrow") +@pytest.mark.parametrize( + "pa_str_dtype", + tm.ALL_INT_PYARROW_DTYPES_STR_REPR + tm.FLOAT_PYARROW_DTYPES_STR_REPR, +) +def test_from_numeric_arrow_dtype(pa_str_dtype): + # GH 52425 + ser = Series([1, 2], dtype=pa_str_dtype) + result = to_timedelta(ser) + expected = Series([1, 2], dtype="timedelta64[ns]") + tm.assert_series_equal(result, expected) From 34197ad0b43b3af82fe2b97e4d41d66a7f1e1c1d Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 5 Apr 2023 17:25:07 -0700 Subject: [PATCH 2/2] Test compat --- pandas/tests/tools/test_to_datetime.py | 11 +++-------- pandas/tests/tools/test_to_timedelta.py | 11 +++-------- 2 files changed, 6 insertions(+), 16 deletions(-) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index e5f2eab2daac9..7b707be97c653 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -22,7 +22,6 @@ iNaT, parsing, ) -from pandas.compat.pyarrow import pa_version_under7p0 from pandas.errors import ( OutOfBoundsDatetime, OutOfBoundsTimedelta, @@ -3589,14 +3588,10 @@ def test_ignoring_unknown_tz_deprecated(): tm.assert_index_equal(res, to_datetime([dtstr[:-5]])) -@pytest.mark.skipif(pa_version_under7p0, reason="Requires Pyarrow") -@pytest.mark.parametrize( - "pa_str_dtype", - tm.ALL_INT_PYARROW_DTYPES_STR_REPR + tm.FLOAT_PYARROW_DTYPES_STR_REPR, -) -def test_from_numeric_arrow_dtype(pa_str_dtype): +def test_from_numeric_arrow_dtype(any_numeric_ea_dtype): # GH 52425 - ser = Series([1, 2], dtype=pa_str_dtype) + pytest.importorskip("pyarrow") + ser = Series([1, 2], dtype=f"{any_numeric_ea_dtype.lower()}[pyarrow]") result = to_datetime(ser) expected = Series([1, 2], dtype="datetime64[ns]") tm.assert_series_equal(result, expected) diff --git a/pandas/tests/tools/test_to_timedelta.py b/pandas/tests/tools/test_to_timedelta.py index 65f99f0cdc460..b1ab449996685 100644 --- a/pandas/tests/tools/test_to_timedelta.py +++ b/pandas/tests/tools/test_to_timedelta.py @@ -6,7 +6,6 @@ import numpy as np import pytest -from pandas.compat.pyarrow import pa_version_under7p0 from pandas.errors import OutOfBoundsTimedelta import pandas as pd @@ -290,14 +289,10 @@ def test_to_timedelta_numeric_ea(self, any_numeric_ea_dtype): tm.assert_series_equal(result, expected) -@pytest.mark.skipif(pa_version_under7p0, reason="Requires Pyarrow") -@pytest.mark.parametrize( - "pa_str_dtype", - tm.ALL_INT_PYARROW_DTYPES_STR_REPR + tm.FLOAT_PYARROW_DTYPES_STR_REPR, -) -def test_from_numeric_arrow_dtype(pa_str_dtype): +def test_from_numeric_arrow_dtype(any_numeric_ea_dtype): # GH 52425 - ser = Series([1, 2], dtype=pa_str_dtype) + pytest.importorskip("pyarrow") + ser = Series([1, 2], dtype=f"{any_numeric_ea_dtype.lower()}[pyarrow]") result = to_timedelta(ser) expected = Series([1, 2], dtype="timedelta64[ns]") tm.assert_series_equal(result, expected)