Skip to content

Commit b751288

Browse files
jbrockmendelpooja-subramaniam
authored andcommitted
ENH: support any/all for pyarrow numeric and duration dtypes (pandas-dev#50717)
* ENH: support any/all for pyarrow numeric and duration dtypes * mypy fixup * use pc.not_equal * use suggested pattern * fix not_eq
1 parent 0fdb55b commit b751288

File tree

2 files changed

+37
-2
lines changed

2 files changed

+37
-2
lines changed

pandas/core/arrays/arrow/array.py

+22-1
Original file line numberDiff line numberDiff line change
@@ -1012,6 +1012,26 @@ def _reduce(self, name: str, *, skipna: bool = True, **kwargs):
10121012
------
10131013
TypeError : subclass does not define reductions
10141014
"""
1015+
pa_type = self._data.type
1016+
1017+
data_to_reduce = self._data
1018+
1019+
if name in ["any", "all"] and (
1020+
pa.types.is_integer(pa_type)
1021+
or pa.types.is_floating(pa_type)
1022+
or pa.types.is_duration(pa_type)
1023+
):
1024+
# pyarrow only supports any/all for boolean dtype, we allow
1025+
# for other dtypes, matching our non-pyarrow behavior
1026+
1027+
if pa.types.is_duration(pa_type):
1028+
data_to_cmp = self._data.cast(pa.int64())
1029+
else:
1030+
data_to_cmp = self._data
1031+
1032+
not_eq = pc.not_equal(data_to_cmp, 0)
1033+
data_to_reduce = not_eq
1034+
10151035
if name == "sem":
10161036

10171037
def pyarrow_meth(data, skip_nulls, **kwargs):
@@ -1033,8 +1053,9 @@ def pyarrow_meth(data, skip_nulls, **kwargs):
10331053
if pyarrow_meth is None:
10341054
# Let ExtensionArray._reduce raise the TypeError
10351055
return super()._reduce(name, skipna=skipna, **kwargs)
1056+
10361057
try:
1037-
result = pyarrow_meth(self._data, skip_nulls=skipna, **kwargs)
1058+
result = pyarrow_meth(data_to_reduce, skip_nulls=skipna, **kwargs)
10381059
except (AttributeError, NotImplementedError, TypeError) as err:
10391060
msg = (
10401061
f"'{type(self).__name__}' with dtype {self.dtype} "

pandas/tests/extension/test_arrow.py

+15-1
Original file line numberDiff line numberDiff line change
@@ -566,10 +566,24 @@ def test_reduce_series(
566566
f"pyarrow={pa.__version__} for {pa_dtype}"
567567
),
568568
)
569-
if not pa.types.is_boolean(pa_dtype):
569+
if pa.types.is_string(pa_dtype) or pa.types.is_binary(pa_dtype):
570+
# We *might* want to make this behave like the non-pyarrow cases,
571+
# but have not yet decided.
570572
request.node.add_marker(xfail_mark)
573+
571574
op_name = all_boolean_reductions
572575
ser = pd.Series(data)
576+
577+
if pa.types.is_temporal(pa_dtype) and not pa.types.is_duration(pa_dtype):
578+
# xref GH#34479 we support this in our non-pyarrow datetime64 dtypes,
579+
# but it isn't obvious we _should_. For now, we keep the pyarrow
580+
# behavior which does not support this.
581+
582+
with pytest.raises(TypeError, match="does not support reduction"):
583+
getattr(ser, op_name)(skipna=skipna)
584+
585+
return
586+
573587
result = getattr(ser, op_name)(skipna=skipna)
574588
assert result is (op_name == "any")
575589

0 commit comments

Comments
 (0)