Skip to content

Commit d2f05c2

Browse files
authored
ENH: Implement interchange protocol for DatetimeTZDtype (#54246)
* ENH: Implement interchange protocol for DatetimeTZDtype * Add type ignores * Add comment
1 parent f49cd3b commit d2f05c2

File tree

5 files changed

+36
-10
lines changed

5 files changed

+36
-10
lines changed

doc/source/whatsnew/v2.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,7 @@ Other enhancements
149149
- Adding ``engine_kwargs`` parameter to :meth:`DataFrame.read_excel` (:issue:`52214`)
150150
- Classes that are useful for type-hinting have been added to the public API in the new submodule ``pandas.api.typing`` (:issue:`48577`)
151151
- Implemented :attr:`Series.dt.is_month_start`, :attr:`Series.dt.is_month_end`, :attr:`Series.dt.is_year_start`, :attr:`Series.dt.is_year_end`, :attr:`Series.dt.is_quarter_start`, :attr:`Series.dt.is_quarter_end`, :attr:`Series.dt.is_days_in_month`, :attr:`Series.dt.unit`, :meth:`Series.dt.is_normalize`, :meth:`Series.dt.day_name`, :meth:`Series.dt.month_name`, :meth:`Series.dt.tz_convert` for :class:`ArrowDtype` with ``pyarrow.timestamp`` (:issue:`52388`, :issue:`51718`)
152+
- Implemented :func:`api.interchange.from_dataframe` for :class:`DatetimeTZDtype` (:issue:`54239`)
152153
- Implemented ``__from_arrow__`` on :class:`DatetimeTZDtype`. (:issue:`52201`)
153154
- Implemented ``__pandas_priority__`` to allow custom types to take precedence over :class:`DataFrame`, :class:`Series`, :class:`Index`, or :class:`ExtensionArray` for arithmetic operations, :ref:`see the developer guide <extending.pandas_priority>` (:issue:`48347`)
154155
- Improve error message when having incompatible columns using :meth:`DataFrame.merge` (:issue:`51861`)

pandas/core/interchange/column.py

+13-2
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,10 @@
99
from pandas.errors import NoBufferPresent
1010
from pandas.util._decorators import cache_readonly
1111

12-
from pandas.core.dtypes.dtypes import ArrowDtype
12+
from pandas.core.dtypes.dtypes import (
13+
ArrowDtype,
14+
DatetimeTZDtype,
15+
)
1316

1417
import pandas as pd
1518
from pandas.api.types import is_string_dtype
@@ -138,6 +141,8 @@ def _dtype_from_pandasdtype(self, dtype) -> tuple[DtypeKind, int, str, str]:
138141
raise ValueError(f"Data type {dtype} not supported by interchange protocol")
139142
if isinstance(dtype, ArrowDtype):
140143
byteorder = dtype.numpy_dtype.byteorder
144+
elif isinstance(dtype, DatetimeTZDtype):
145+
byteorder = dtype.base.byteorder # type: ignore[union-attr]
141146
else:
142147
byteorder = dtype.byteorder
143148

@@ -269,7 +274,13 @@ def _get_data_buffer(
269274
DtypeKind.BOOL,
270275
DtypeKind.DATETIME,
271276
):
272-
buffer = PandasBuffer(self._col.to_numpy(), allow_copy=self._allow_copy)
277+
# self.dtype[2] is an ArrowCTypes.TIMESTAMP where the tz will make
278+
# it longer than 4 characters
279+
if self.dtype[0] == DtypeKind.DATETIME and len(self.dtype[2]) > 4:
280+
np_arr = self._col.dt.tz_convert(None).to_numpy()
281+
else:
282+
np_arr = self._col.to_numpy()
283+
buffer = PandasBuffer(np_arr, allow_copy=self._allow_copy)
273284
dtype = self.dtype
274285
elif self.dtype[0] == DtypeKind.CATEGORICAL:
275286
codes = self._col.values._codes

pandas/core/interchange/from_dataframe.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -325,20 +325,20 @@ def string_column_to_ndarray(col: Column) -> tuple[np.ndarray, Any]:
325325
return np.asarray(str_list, dtype="object"), buffers
326326

327327

328-
def parse_datetime_format_str(format_str, data):
328+
def parse_datetime_format_str(format_str, data) -> pd.Series | np.ndarray:
329329
"""Parse datetime `format_str` to interpret the `data`."""
330330
# timestamp 'ts{unit}:tz'
331331
timestamp_meta = re.match(r"ts([smun]):(.*)", format_str)
332332
if timestamp_meta:
333333
unit, tz = timestamp_meta.group(1), timestamp_meta.group(2)
334-
if tz != "":
335-
raise NotImplementedError("Timezones are not supported yet")
336334
if unit != "s":
337335
# the format string describes only a first letter of the unit, so
338336
# add one extra letter to convert the unit to numpy-style:
339337
# 'm' -> 'ms', 'u' -> 'us', 'n' -> 'ns'
340338
unit += "s"
341339
data = data.astype(f"datetime64[{unit}]")
340+
if tz != "":
341+
data = pd.Series(data).dt.tz_localize("UTC").dt.tz_convert(tz)
342342
return data
343343

344344
# date 'td{Days/Ms}'
@@ -358,7 +358,7 @@ def parse_datetime_format_str(format_str, data):
358358
raise NotImplementedError(f"DateTime kind is not supported: {format_str}")
359359

360360

361-
def datetime_column_to_ndarray(col: Column) -> tuple[np.ndarray, Any]:
361+
def datetime_column_to_ndarray(col: Column) -> tuple[np.ndarray | pd.Series, Any]:
362362
"""
363363
Convert a column holding DateTime data to a NumPy array.
364364
@@ -389,7 +389,7 @@ def datetime_column_to_ndarray(col: Column) -> tuple[np.ndarray, Any]:
389389
length=col.size(),
390390
)
391391

392-
data = parse_datetime_format_str(format_str, data)
392+
data = parse_datetime_format_str(format_str, data) # type: ignore[assignment]
393393
data = set_nulls(data, col, buffers["validity"])
394394
return data, buffers
395395

pandas/core/interchange/utils.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44

55
from __future__ import annotations
66

7-
import re
87
import typing
98

109
import numpy as np
@@ -14,6 +13,7 @@
1413
from pandas.core.dtypes.dtypes import (
1514
ArrowDtype,
1615
CategoricalDtype,
16+
DatetimeTZDtype,
1717
)
1818

1919
if typing.TYPE_CHECKING:
@@ -134,10 +134,13 @@ def dtype_to_arrow_c_fmt(dtype: DtypeObj) -> str:
134134

135135
if lib.is_np_dtype(dtype, "M"):
136136
# Selecting the first char of resolution string:
137-
# dtype.str -> '<M8[ns]'
138-
resolution = re.findall(r"\[(.*)\]", dtype.str)[0][:1]
137+
# dtype.str -> '<M8[ns]' -> 'n'
138+
resolution = np.datetime_data(dtype)[0][0]
139139
return ArrowCTypes.TIMESTAMP.format(resolution=resolution, tz="")
140140

141+
elif isinstance(dtype, DatetimeTZDtype):
142+
return ArrowCTypes.TIMESTAMP.format(resolution=dtype.unit[0], tz=dtype.tz)
143+
141144
raise NotImplementedError(
142145
f"Conversion of {dtype} to Arrow C format string is not implemented."
143146
)

pandas/tests/interchange/test_impl.py

+11
Original file line numberDiff line numberDiff line change
@@ -284,3 +284,14 @@ def test_empty_pyarrow(data):
284284
arrow_df = pa_from_dataframe(expected)
285285
result = from_dataframe(arrow_df)
286286
tm.assert_frame_equal(result, expected)
287+
288+
289+
@pytest.mark.parametrize("tz", ["UTC", "US/Pacific"])
290+
@pytest.mark.parametrize("unit", ["s", "ms", "us", "ns"])
291+
def test_datetimetzdtype(tz, unit):
292+
# GH 54239
293+
tz_data = (
294+
pd.date_range("2018-01-01", periods=5, freq="D").tz_localize(tz).as_unit(unit)
295+
)
296+
df = pd.DataFrame({"ts_tz": tz_data})
297+
tm.assert_frame_equal(df, from_dataframe(df.__dataframe__()))

0 commit comments

Comments
 (0)