Skip to content

Commit 52306d9

Browse files
authored
ENH: support reductions for pyarrow temporal types (#50998)
* ENH: support reductions for pyarrow temporal types * unit check * lint fixup
1 parent 9e52b49 commit 52306d9

File tree

4 files changed

+38
-7
lines changed

4 files changed

+38
-7
lines changed

pandas/compat/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
pa_version_under7p0,
3131
pa_version_under8p0,
3232
pa_version_under9p0,
33+
pa_version_under11p0,
3334
)
3435

3536

@@ -159,6 +160,7 @@ def get_lzma_file() -> type[pandas.compat.compressors.LZMAFile]:
159160
"pa_version_under7p0",
160161
"pa_version_under8p0",
161162
"pa_version_under9p0",
163+
"pa_version_under11p0",
162164
"IS64",
163165
"PY39",
164166
"PY310",

pandas/compat/pyarrow.py

+2
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,10 @@
1313
pa_version_under8p0 = _palv < Version("8.0.0")
1414
pa_version_under9p0 = _palv < Version("9.0.0")
1515
pa_version_under10p0 = _palv < Version("10.0.0")
16+
pa_version_under11p0 = _palv < Version("11.0.0")
1617
except ImportError:
1718
pa_version_under7p0 = True
1819
pa_version_under8p0 = True
1920
pa_version_under9p0 = True
2021
pa_version_under10p0 = True
22+
pa_version_under11p0 = True

pandas/core/arrays/arrow/array.py

+34
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
pa_version_under7p0,
3232
pa_version_under8p0,
3333
pa_version_under9p0,
34+
pa_version_under11p0,
3435
)
3536
from pandas.util._decorators import doc
3637
from pandas.util._validators import validate_fillna_kwargs
@@ -134,6 +135,16 @@ def floordiv_compat(
134135
ArrowExtensionArrayT = TypeVar("ArrowExtensionArrayT", bound="ArrowExtensionArray")
135136

136137

138+
def get_unit_from_pa_dtype(pa_dtype):
139+
# https://github.com/pandas-dev/pandas/pull/50998#discussion_r1100344804
140+
if pa_version_under11p0:
141+
unit = str(pa_dtype).split("[", 1)[-1][:-1]
142+
if unit not in ["s", "ms", "us", "ns"]:
143+
raise ValueError(pa_dtype)
144+
return unit
145+
return pa_dtype.unit
146+
147+
137148
def to_pyarrow_type(
138149
dtype: ArrowDtype | pa.DataType | Dtype | None,
139150
) -> pa.DataType | None:
@@ -1043,6 +1054,13 @@ def _reduce(self, name: str, *, skipna: bool = True, **kwargs):
10431054
elif name in ["min", "max", "sum"] and pa.types.is_duration(pa_type):
10441055
data_to_reduce = self._data.cast(pa.int64())
10451056

1057+
elif name in ["median", "mean", "std", "sem"] and pa.types.is_temporal(pa_type):
1058+
nbits = pa_type.bit_width
1059+
if nbits == 32:
1060+
data_to_reduce = self._data.cast(pa.int32())
1061+
else:
1062+
data_to_reduce = self._data.cast(pa.int64())
1063+
10461064
if name == "sem":
10471065

10481066
def pyarrow_meth(data, skip_nulls, **kwargs):
@@ -1080,6 +1098,22 @@ def pyarrow_meth(data, skip_nulls, **kwargs):
10801098

10811099
if name in ["min", "max", "sum"] and pa.types.is_duration(pa_type):
10821100
result = result.cast(pa_type)
1101+
if name in ["median", "mean"] and pa.types.is_temporal(pa_type):
1102+
result = result.cast(pa_type)
1103+
if name in ["std", "sem"] and pa.types.is_temporal(pa_type):
1104+
result = result.cast(pa.int64())
1105+
if pa.types.is_duration(pa_type):
1106+
result = result.cast(pa_type)
1107+
elif pa.types.is_time(pa_type):
1108+
unit = get_unit_from_pa_dtype(pa_type)
1109+
result = result.cast(pa.duration(unit))
1110+
elif pa.types.is_date(pa_type):
1111+
# go with closest available unit, i.e. "s"
1112+
result = result.cast(pa.duration("s"))
1113+
else:
1114+
# i.e. timestamp
1115+
result = result.cast(pa.duration(pa_type.unit))
1116+
10831117
return result.as_py()
10841118

10851119
def __setitem__(self, key, value) -> None:

pandas/tests/extension/test_arrow.py

-7
Original file line numberDiff line numberDiff line change
@@ -513,13 +513,6 @@ def test_reduce_series(self, data, all_numeric_reductions, skipna, request):
513513
elif all_numeric_reductions == "sem" and pa_version_under8p0:
514514
request.node.add_marker(xfail_mark)
515515

516-
elif all_numeric_reductions in [
517-
"mean",
518-
"median",
519-
"std",
520-
"sem",
521-
] and pa.types.is_temporal(pa_dtype):
522-
request.node.add_marker(xfail_mark)
523516
elif pa.types.is_boolean(pa_dtype) and all_numeric_reductions in {
524517
"sem",
525518
"std",

0 commit comments

Comments
 (0)