|
31 | 31 | pa_version_under7p0,
|
32 | 32 | pa_version_under8p0,
|
33 | 33 | pa_version_under9p0,
|
| 34 | + pa_version_under11p0, |
34 | 35 | )
|
35 | 36 | from pandas.util._decorators import doc
|
36 | 37 | from pandas.util._validators import validate_fillna_kwargs
|
@@ -134,6 +135,16 @@ def floordiv_compat(
|
134 | 135 | ArrowExtensionArrayT = TypeVar("ArrowExtensionArrayT", bound="ArrowExtensionArray")
|
135 | 136 |
|
136 | 137 |
|
| 138 | +def get_unit_from_pa_dtype(pa_dtype): |
| 139 | + # https://github.com/pandas-dev/pandas/pull/50998#discussion_r1100344804 |
| 140 | + if pa_version_under11p0: |
| 141 | + unit = str(pa_dtype).split("[", 1)[-1][:-1] |
| 142 | + if unit not in ["s", "ms", "us", "ns"]: |
| 143 | + raise ValueError(pa_dtype) |
| 144 | + return unit |
| 145 | + return pa_dtype.unit |
| 146 | + |
| 147 | + |
137 | 148 | def to_pyarrow_type(
|
138 | 149 | dtype: ArrowDtype | pa.DataType | Dtype | None,
|
139 | 150 | ) -> pa.DataType | None:
|
@@ -1043,6 +1054,13 @@ def _reduce(self, name: str, *, skipna: bool = True, **kwargs):
|
1043 | 1054 | elif name in ["min", "max", "sum"] and pa.types.is_duration(pa_type):
|
1044 | 1055 | data_to_reduce = self._data.cast(pa.int64())
|
1045 | 1056 |
|
| 1057 | + elif name in ["median", "mean", "std", "sem"] and pa.types.is_temporal(pa_type): |
| 1058 | + nbits = pa_type.bit_width |
| 1059 | + if nbits == 32: |
| 1060 | + data_to_reduce = self._data.cast(pa.int32()) |
| 1061 | + else: |
| 1062 | + data_to_reduce = self._data.cast(pa.int64()) |
| 1063 | + |
1046 | 1064 | if name == "sem":
|
1047 | 1065 |
|
1048 | 1066 | def pyarrow_meth(data, skip_nulls, **kwargs):
|
@@ -1080,6 +1098,22 @@ def pyarrow_meth(data, skip_nulls, **kwargs):
|
1080 | 1098 |
|
1081 | 1099 | if name in ["min", "max", "sum"] and pa.types.is_duration(pa_type):
|
1082 | 1100 | result = result.cast(pa_type)
|
| 1101 | + if name in ["median", "mean"] and pa.types.is_temporal(pa_type): |
| 1102 | + result = result.cast(pa_type) |
| 1103 | + if name in ["std", "sem"] and pa.types.is_temporal(pa_type): |
| 1104 | + result = result.cast(pa.int64()) |
| 1105 | + if pa.types.is_duration(pa_type): |
| 1106 | + result = result.cast(pa_type) |
| 1107 | + elif pa.types.is_time(pa_type): |
| 1108 | + unit = get_unit_from_pa_dtype(pa_type) |
| 1109 | + result = result.cast(pa.duration(unit)) |
| 1110 | + elif pa.types.is_date(pa_type): |
| 1111 | + # go with closest available unit, i.e. "s" |
| 1112 | + result = result.cast(pa.duration("s")) |
| 1113 | + else: |
| 1114 | + # i.e. timestamp |
| 1115 | + result = result.cast(pa.duration(pa_type.unit)) |
| 1116 | + |
1083 | 1117 | return result.as_py()
|
1084 | 1118 |
|
1085 | 1119 | def __setitem__(self, key, value) -> None:
|
|
0 commit comments