From 3d1cb464461a58fa000bc012d0cc089cb48f1215 Mon Sep 17 00:00:00 2001 From: Anurag Varma Date: Mon, 17 Feb 2025 19:03:54 -0500 Subject: [PATCH 1/4] modified the files according to bug#60237 --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/dtypes/cast.py | 2 +- pandas/tests/extension/test_arrow.py | 24 ++++++++++++++++++++++++ 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 4d9a45abe17cd..d2efde6c013c8 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -668,6 +668,7 @@ Conversion - Bug in :meth:`DataFrame.update` bool dtype being converted to object (:issue:`55509`) - Bug in :meth:`Series.astype` might modify read-only array inplace when casting to a string dtype (:issue:`57212`) - Bug in :meth:`Series.reindex` not maintaining ``float32`` type when a ``reindex`` introduces a missing value (:issue:`45857`) +- Bug in :meth:`convert_dtypes` not preserving timezone details for ArrowDtype in Series and DataFrame (:issue:`60237`) Strings ^^^^^^^ diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 94531c2ac87e8..f11aefeeaaa00 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1113,7 +1113,7 @@ def convert_dtypes( else: inferred_dtype = input_array.dtype - if dtype_backend == "pyarrow": + if dtype_backend == "pyarrow" and not isinstance(inferred_dtype, ArrowDtype): from pandas.core.arrays.arrow.array import to_pyarrow_type from pandas.core.arrays.string_ import StringDtype diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index d6f428f4938a6..0561ae00871cc 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -3511,3 +3511,27 @@ def test_map_numeric_na_action(): result = ser.map(lambda x: 42, na_action="ignore") expected = pd.Series([42.0, 42.0, np.nan], dtype="float64") tm.assert_series_equal(result, expected) + + +def test_convert_dtype_pyarrow_timezone_preserve(): + # GH 60237 + pytest.importorskip("pyarrow") + ser = pd.Series( + pd.to_datetime(range(5), utc=True, unit="h"), + dtype="timestamp[ns, tz=UTC][pyarrow]", + ) + result = ser.convert_dtypes(dtype_backend="pyarrow") + expected = ser.copy() + tm.assert_series_equal(result, expected) + + df = pd.DataFrame( + { + "timestamps": pd.Series( + pd.to_datetime(range(5), utc=True, unit="h"), + dtype="timestamp[ns, tz=UTC][pyarrow]", + ) + } + ) + result = df.convert_dtypes(dtype_backend="pyarrow") + expected = df.copy() + tm.assert_frame_equal(result, expected) From 3d40bcaf3b7694ca9b9361ad51bbc10c577441db Mon Sep 17 00:00:00 2001 From: Anurag Varma Date: Tue, 18 Feb 2025 12:33:34 -0500 Subject: [PATCH 2/4] Update doc/source/whatsnew/v3.0.0.rst Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- doc/source/whatsnew/v3.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index ecab5632f59a1..9d6c00ef1dc6f 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -671,7 +671,7 @@ Conversion - Bug in :meth:`DataFrame.update` bool dtype being converted to object (:issue:`55509`) - Bug in :meth:`Series.astype` might modify read-only array inplace when casting to a string dtype (:issue:`57212`) - Bug in :meth:`Series.reindex` not maintaining ``float32`` type when a ``reindex`` introduces a missing value (:issue:`45857`) -- Bug in :meth:`convert_dtypes` not preserving timezone details for ArrowDtype in Series and DataFrame (:issue:`60237`) +- Bug in :meth:`Series.convert_dtypes` and :meth:`DataFrame.convert_dtypes` removing timezone information for objects with :class:`ArrowDtype` (:issue:`60237`) Strings ^^^^^^^ From 43e97c5ca3f2048f98bec739e27ac50d69519370 Mon Sep 17 00:00:00 2001 From: Anurag Varma Date: Tue, 18 Feb 2025 13:02:27 -0500 Subject: [PATCH 3/4] moved test case to frame and serier folders --- doc/source/whatsnew/v3.0.0.rst | 2 +- pandas/tests/extension/test_arrow.py | 24 ------------------- .../frame/methods/test_convert_dtypes.py | 14 +++++++++++ .../series/methods/test_convert_dtypes.py | 11 +++++++++ 4 files changed, 26 insertions(+), 25 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 9d6c00ef1dc6f..7cd5759af4989 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -670,8 +670,8 @@ Conversion - Bug in :meth:`DataFrame.astype` not casting ``values`` for Arrow-based dictionary dtype correctly (:issue:`58479`) - Bug in :meth:`DataFrame.update` bool dtype being converted to object (:issue:`55509`) - Bug in :meth:`Series.astype` might modify read-only array inplace when casting to a string dtype (:issue:`57212`) -- Bug in :meth:`Series.reindex` not maintaining ``float32`` type when a ``reindex`` introduces a missing value (:issue:`45857`) - Bug in :meth:`Series.convert_dtypes` and :meth:`DataFrame.convert_dtypes` removing timezone information for objects with :class:`ArrowDtype` (:issue:`60237`) +- Bug in :meth:`Series.reindex` not maintaining ``float32`` type when a ``reindex`` introduces a missing value (:issue:`45857`) Strings ^^^^^^^ diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 0561ae00871cc..d6f428f4938a6 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -3511,27 +3511,3 @@ def test_map_numeric_na_action(): result = ser.map(lambda x: 42, na_action="ignore") expected = pd.Series([42.0, 42.0, np.nan], dtype="float64") tm.assert_series_equal(result, expected) - - -def test_convert_dtype_pyarrow_timezone_preserve(): - # GH 60237 - pytest.importorskip("pyarrow") - ser = pd.Series( - pd.to_datetime(range(5), utc=True, unit="h"), - dtype="timestamp[ns, tz=UTC][pyarrow]", - ) - result = ser.convert_dtypes(dtype_backend="pyarrow") - expected = ser.copy() - tm.assert_series_equal(result, expected) - - df = pd.DataFrame( - { - "timestamps": pd.Series( - pd.to_datetime(range(5), utc=True, unit="h"), - dtype="timestamp[ns, tz=UTC][pyarrow]", - ) - } - ) - result = df.convert_dtypes(dtype_backend="pyarrow") - expected = df.copy() - tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_convert_dtypes.py b/pandas/tests/frame/methods/test_convert_dtypes.py index e7f6e5d625d3e..4e99444eced42 100644 --- a/pandas/tests/frame/methods/test_convert_dtypes.py +++ b/pandas/tests/frame/methods/test_convert_dtypes.py @@ -196,3 +196,17 @@ def test_convert_dtypes_from_arrow(self): result = df.convert_dtypes() expected = df.astype({"a": "string[python]"}) tm.assert_frame_equal(result, expected) + + def test_convert_dtype_pyarrow_timezone_preserve(self): + # GH 60237 + df = pd.DataFrame( + { + "timestamps": pd.Series( + pd.to_datetime(range(5), utc=True, unit="h"), + dtype="timestamp[ns, tz=UTC][pyarrow]", + ) + } + ) + result = df.convert_dtypes(dtype_backend="pyarrow") + expected = df.copy() + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/series/methods/test_convert_dtypes.py b/pandas/tests/series/methods/test_convert_dtypes.py index 90c4056a39e84..d373386108ff6 100644 --- a/pandas/tests/series/methods/test_convert_dtypes.py +++ b/pandas/tests/series/methods/test_convert_dtypes.py @@ -297,3 +297,14 @@ def test_convert_dtypes_pyarrow_null(self): result = ser.convert_dtypes(dtype_backend="pyarrow") expected = pd.Series([None, None], dtype=pd.ArrowDtype(pa.null())) tm.assert_series_equal(result, expected) + + def test_convert_dtype_pyarrow_timezone_preserve(self): + # GH 60237 + pytest.importorskip("pyarrow") + ser = pd.Series( + pd.to_datetime(range(5), utc=True, unit="h"), + dtype="timestamp[ns, tz=UTC][pyarrow]", + ) + result = ser.convert_dtypes(dtype_backend="pyarrow") + expected = ser.copy() + tm.assert_series_equal(result, expected) From 0b47d24ef03af73c264c332851e9370bfbd5ab98 Mon Sep 17 00:00:00 2001 From: Anurag Varma Date: Tue, 18 Feb 2025 13:50:35 -0500 Subject: [PATCH 4/4] fix pyarrow import error --- pandas/tests/frame/methods/test_convert_dtypes.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/frame/methods/test_convert_dtypes.py b/pandas/tests/frame/methods/test_convert_dtypes.py index 4e99444eced42..d0f30204758d3 100644 --- a/pandas/tests/frame/methods/test_convert_dtypes.py +++ b/pandas/tests/frame/methods/test_convert_dtypes.py @@ -199,6 +199,7 @@ def test_convert_dtypes_from_arrow(self): def test_convert_dtype_pyarrow_timezone_preserve(self): # GH 60237 + pytest.importorskip("pyarrow") df = pd.DataFrame( { "timestamps": pd.Series(