From fa5a1009a3ad2cc8f9af0a7eff92bd75a8ab984b Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 5 Dec 2023 13:29:12 -1000 Subject: [PATCH] Backport PR #56294: BUG: Series(strings, dtype=ArrowDtype[timestamp]) raising --- doc/source/whatsnew/v2.1.4.rst | 1 + pandas/core/arrays/arrow/array.py | 2 +- pandas/tests/extension/test_arrow.py | 10 ++++++++++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.4.rst b/doc/source/whatsnew/v2.1.4.rst index 3caeef3d26ead..362ee81bbdc09 100644 --- a/doc/source/whatsnew/v2.1.4.rst +++ b/doc/source/whatsnew/v2.1.4.rst @@ -22,6 +22,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ - Bug in :class:`Series` constructor raising DeprecationWarning when ``index`` is a list of :class:`Series` (:issue:`55228`) +- Bug in :class:`Series` when trying to cast date-like string inputs to :class:`ArrowDtype` of ``pyarrow.timestamp`` (:issue:`56266`) - Bug in :meth:`DataFrame.apply` where passing ``raw=True`` ignored ``args`` passed to the applied function (:issue:`55753`) - Bug in :meth:`Index.__getitem__` returning wrong result for Arrow dtypes and negative stepsize (:issue:`55832`) - Fixed bug in :func:`to_numeric` converting to extension dtype for ``string[pyarrow_numpy]`` dtype (:issue:`56179`) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index a08f3ba44f417..bd94f7b0de956 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -462,7 +462,7 @@ def _box_pa_array( try: pa_array = pa.array(value, type=pa_type, from_pandas=True) - except pa.ArrowInvalid: + except (pa.ArrowInvalid, pa.ArrowTypeError): # GH50430: let pyarrow infer type, then cast pa_array = pa.array(value, from_pandas=True) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 61474aa94d1c8..44e3a99a65de7 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -3100,3 +3100,13 @@ def test_arrow_floordiv(): expected = pd.Series([-2], dtype="int64[pyarrow]") result = a // b tm.assert_series_equal(result, expected) + + +def test_string_to_datetime_parsing_cast(): + # GH 56266 + string_dates = ["2020-01-01 04:30:00", "2020-01-02 00:00:00", "2020-01-03 00:00:00"] + result = pd.Series(string_dates, dtype="timestamp[ns][pyarrow]") + expected = pd.Series( + ArrowExtensionArray(pa.array(pd.to_datetime(string_dates), from_pandas=True)) + ) + tm.assert_series_equal(result, expected)