From 76ba0cc6002bbd0a0be086f4132b43caef8a2cbe Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 1 Dec 2023 16:47:22 -0800 Subject: [PATCH] BUG: Series(strings, dtype=ArrowDtype[timestamp]) raising --- doc/source/whatsnew/v2.1.4.rst | 1 + pandas/core/arrays/arrow/array.py | 2 +- pandas/tests/extension/test_arrow.py | 10 ++++++++++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.4.rst b/doc/source/whatsnew/v2.1.4.rst index 9e3eb90436642..a18f4c60a1980 100644 --- a/doc/source/whatsnew/v2.1.4.rst +++ b/doc/source/whatsnew/v2.1.4.rst @@ -22,6 +22,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ - Bug in :class:`Series` constructor raising DeprecationWarning when ``index`` is a list of :class:`Series` (:issue:`55228`) +- Bug in :class:`Series` when trying to cast date-like string inputs to :class:`ArrowDtype` of ``pyarrow.timestamp`` (:issue:`56266`) - Bug in :meth:`Index.__getitem__` returning wrong result for Arrow dtypes and negative stepsize (:issue:`55832`) - Fixed bug in :func:`to_numeric` converting to extension dtype for ``string[pyarrow_numpy]`` dtype (:issue:`56179`) - Fixed bug in :meth:`DataFrame.__setitem__` casting :class:`Index` with object-dtype to PyArrow backed strings when ``infer_string`` option is set (:issue:`55638`) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index d162b66e5d369..a035d68386add 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -463,7 +463,7 @@ def _box_pa_array( try: pa_array = pa.array(value, type=pa_type, from_pandas=True) - except pa.ArrowInvalid: + except (pa.ArrowInvalid, pa.ArrowTypeError): # GH50430: let pyarrow infer type, then cast pa_array = pa.array(value, from_pandas=True) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 7131a50956a7d..38e7a86c80a78 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -2959,3 +2959,13 @@ def test_arrow_floordiv(): expected = pd.Series([-2], dtype="int64[pyarrow]") result = a // b tm.assert_series_equal(result, expected) + + +def test_string_to_datetime_parsing_cast(): + # GH 56266 + string_dates = ["2020-01-01 04:30:00", "2020-01-02 00:00:00", "2020-01-03 00:00:00"] + result = pd.Series(string_dates, dtype="timestamp[ns][pyarrow]") + expected = pd.Series( + ArrowExtensionArray(pa.array(pd.to_datetime(string_dates), from_pandas=True)) + ) + tm.assert_series_equal(result, expected)