Skip to content

Commit b1728e9

Browse files
lukemanleytopper-123
authored andcommitted
BUG: convert_dtypes(dtype_backend="pyarrow") losing tz for tz-aware dtypes (pandas-dev#53382)
* BUG: convert_dtypes(dtype_backend="pyarrow") losing tz for tz-aware dtypes * whatsnew
1 parent 0badde9 commit b1728e9

File tree

4 files changed

+19
-2
lines changed

4 files changed

+19
-2
lines changed

doc/source/whatsnew/v2.0.2.rst

+1
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ Bug fixes
3535
- Bug in :func:`to_timedelta` was raising ``ValueError`` with ``pandas.NA`` (:issue:`52909`)
3636
- Bug in :meth:`DataFrame.__getitem__` not preserving dtypes for :class:`MultiIndex` partial keys (:issue:`51895`)
3737
- Bug in :meth:`DataFrame.convert_dtypes` ignores ``convert_*`` keywords when set to False ``dtype_backend="pyarrow"`` (:issue:`52872`)
38+
- Bug in :meth:`DataFrame.convert_dtypes` losing timezone for tz-aware dtypes and ``dtype_backend="pyarrow"`` (:issue:`53382`)
3839
- Bug in :meth:`DataFrame.sort_values` raising for PyArrow ``dictionary`` dtype (:issue:`53232`)
3940
- Bug in :meth:`Series.describe` treating pyarrow-backed timestamps and timedeltas as categorical data (:issue:`53001`)
4041
- Bug in :meth:`Series.rename` not making a lazy copy when Copy-on-Write is enabled when a scalar is passed to it (:issue:`52450`)

pandas/core/arrays/arrow/array.py

+3
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
is_object_dtype,
4040
is_scalar,
4141
)
42+
from pandas.core.dtypes.dtypes import DatetimeTZDtype
4243
from pandas.core.dtypes.missing import isna
4344

4445
from pandas.core import roperator
@@ -170,6 +171,8 @@ def to_pyarrow_type(
170171
return dtype.pyarrow_dtype
171172
elif isinstance(dtype, pa.DataType):
172173
return dtype
174+
elif isinstance(dtype, DatetimeTZDtype):
175+
return pa.timestamp(dtype.unit, dtype.tz)
173176
elif dtype:
174177
try:
175178
# Accepts python types too

pandas/core/dtypes/cast.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -1097,7 +1097,9 @@ def convert_dtypes(
10971097
and not isinstance(inferred_dtype, StringDtype)
10981098
)
10991099
):
1100-
if isinstance(inferred_dtype, PandasExtensionDtype):
1100+
if isinstance(inferred_dtype, PandasExtensionDtype) and not isinstance(
1101+
inferred_dtype, DatetimeTZDtype
1102+
):
11011103
base_dtype = inferred_dtype.base
11021104
elif isinstance(inferred_dtype, (BaseMaskedDtype, ArrowDtype)):
11031105
base_dtype = inferred_dtype.numpy_dtype

pandas/tests/frame/methods/test_convert_dtypes.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,8 @@ def test_pyarrow_dtype_backend(self):
5353
"c": pd.Series([True, False, None], dtype=np.dtype("O")),
5454
"d": pd.Series([np.nan, 100.5, 200], dtype=np.dtype("float")),
5555
"e": pd.Series(pd.date_range("2022", periods=3)),
56-
"f": pd.Series(pd.timedelta_range("1D", periods=3)),
56+
"f": pd.Series(pd.date_range("2022", periods=3, tz="UTC").as_unit("s")),
57+
"g": pd.Series(pd.timedelta_range("1D", periods=3)),
5758
}
5859
)
5960
result = df.convert_dtypes(dtype_backend="pyarrow")
@@ -76,6 +77,16 @@ def test_pyarrow_dtype_backend(self):
7677
)
7778
),
7879
"f": pd.arrays.ArrowExtensionArray(
80+
pa.array(
81+
[
82+
datetime.datetime(2022, 1, 1),
83+
datetime.datetime(2022, 1, 2),
84+
datetime.datetime(2022, 1, 3),
85+
],
86+
type=pa.timestamp(unit="s", tz="UTC"),
87+
)
88+
),
89+
"g": pd.arrays.ArrowExtensionArray(
7990
pa.array(
8091
[
8192
datetime.timedelta(1),

0 commit comments

Comments
 (0)