From 5f6e0e38acf7d604ca6091ac374d0f3831775fd4 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 13 Dec 2023 11:33:27 -0800 Subject: [PATCH 1/2] BUG: time strings cast to ArrowDtype with pa.time64 type --- doc/source/whatsnew/v2.2.0.rst | 2 +- pandas/core/arrays/arrow/array.py | 16 +++++++++++++++- pandas/tests/extension/test_arrow.py | 11 +++++++++++ 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 8209525721b98..eca238223b8f0 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -697,7 +697,7 @@ Other - Bug in rendering ``inf`` values inside a a :class:`DataFrame` with the ``use_inf_as_na`` option enabled (:issue:`55483`) - Bug in rendering a :class:`Series` with a :class:`MultiIndex` when one of the index level's names is 0 not having that name displayed (:issue:`55415`) - Bug in the error message when assigning an empty dataframe to a column (:issue:`55956`) -- +- Bug when time-like strings were being cast to :class:`ArrowDtype` with ``pyarrow.time64`` type (:issue:`56463`) .. ***DO NOT USE THIS SECTION*** diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 9f0a039126d1d..ca7ff576bb009 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -493,7 +493,21 @@ def _box_pa_array( if pa.types.is_dictionary(pa_type): pa_array = pa_array.dictionary_encode() else: - pa_array = pa_array.cast(pa_type) + try: + pa_array = pa_array.cast(pa_type) + except ( + pa.ArrowInvalid, + pa.ArrowTypeError, + pa.ArrowNotImplementedError, + ): + if pa.types.is_string(pa_array.type) or pa.types.is_large_string( + pa_array.type + ): + return cls._from_sequence_of_strings( + value, dtype=pa_type + )._pa_array + else: + raise return pa_array diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 47cd3a51f664b..2f98e172025ce 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -59,6 +59,7 @@ is_string_dtype, is_unsigned_integer_dtype, ) +from pandas.core.tools.times import to_time from pandas.tests.extension import base pa = pytest.importorskip("pyarrow") @@ -3051,3 +3052,13 @@ def test_string_to_datetime_parsing_cast(): ArrowExtensionArray(pa.array(pd.to_datetime(string_dates), from_pandas=True)) ) tm.assert_series_equal(result, expected) + + +def test_string_to_time_parsing_cast(): + # GH 56463 + string_times = ["11:41:43.076160"] + result = pd.Series(string_times, dtype="time64[us][pyarrow]") + expected = pd.Series( + ArrowExtensionArray(pa.array(to_time(string_times), from_pandas=True)) + ) + tm.assert_series_equal(result, expected) From a78f6d1e29bbe9bf5b3030d54167c53ce4066f40 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 14 Dec 2023 08:32:27 -0800 Subject: [PATCH 2/2] use time object in test, add comment about moving logic --- pandas/core/arrays/arrow/array.py | 2 ++ pandas/tests/extension/test_arrow.py | 3 +-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index ca7ff576bb009..f514559625701 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -503,6 +503,8 @@ def _box_pa_array( if pa.types.is_string(pa_array.type) or pa.types.is_large_string( pa_array.type ): + # TODO: Move logic in _from_sequence_of_strings into + # _box_pa_array return cls._from_sequence_of_strings( value, dtype=pa_type )._pa_array diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 2f98e172025ce..18f8494db5ae4 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -59,7 +59,6 @@ is_string_dtype, is_unsigned_integer_dtype, ) -from pandas.core.tools.times import to_time from pandas.tests.extension import base pa = pytest.importorskip("pyarrow") @@ -3059,6 +3058,6 @@ def test_string_to_time_parsing_cast(): string_times = ["11:41:43.076160"] result = pd.Series(string_times, dtype="time64[us][pyarrow]") expected = pd.Series( - ArrowExtensionArray(pa.array(to_time(string_times), from_pandas=True)) + ArrowExtensionArray(pa.array([time(11, 41, 43, 76160)], from_pandas=True)) ) tm.assert_series_equal(result, expected)