diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 1bbec97756e79..6250c298f291f 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -853,6 +853,45 @@ def _concat_same_type( arr = pa.chunked_array(chunks) return cls(arr) + def _accumulate( + self, name: str, *, skipna: bool = True, **kwargs + ) -> ArrowExtensionArray | ExtensionArray: + """ + Return an ExtensionArray performing an accumulation operation. + + The underlying data type might change. + + Parameters + ---------- + name : str + Name of the function, supported values are: + - cummin + - cummax + - cumsum + - cumprod + skipna : bool, default True + If True, skip NA values. + **kwargs + Additional keyword arguments passed to the accumulation function. + Currently, there is no supported kwarg. + + Returns + ------- + array + + Raises + ------ + NotImplementedError : subclass does not define accumulations + """ + pyarrow_name = { + "cumsum": "cumulative_sum_checked", + }.get(name, name) + pyarrow_meth = getattr(pc, pyarrow_name, None) + if pyarrow_meth is None: + return super()._accumulate(name, skipna=skipna, **kwargs) + result = pyarrow_meth(self._data, skip_nulls=skipna, **kwargs) + return type(self)(result) + def _reduce(self, name: str, *, skipna: bool = True, **kwargs): """ Return a scalar result of performing the reduction operation. diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index f93cf3d6bc138..9b42b86efd0d0 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -343,6 +343,54 @@ def test_getitem_scalar(self, data): super().test_getitem_scalar(data) +class TestBaseAccumulateTests(base.BaseAccumulateTests): + def check_accumulate(self, s, op_name, skipna): + result = getattr(s, op_name)(skipna=skipna).astype("Float64") + expected = getattr(s.astype("Float64"), op_name)(skipna=skipna) + self.assert_series_equal(result, expected, check_dtype=False) + + @pytest.mark.parametrize("skipna", [True, False]) + def test_accumulate_series_raises( + self, data, all_numeric_accumulations, skipna, request + ): + pa_type = data.dtype.pyarrow_dtype + if ( + (pa.types.is_integer(pa_type) or pa.types.is_floating(pa_type)) + and all_numeric_accumulations == "cumsum" + and not pa_version_under9p0 + ): + request.node.add_marker( + pytest.mark.xfail( + reason=f"{all_numeric_accumulations} implemented for {pa_type}" + ) + ) + op_name = all_numeric_accumulations + ser = pd.Series(data) + + with pytest.raises(NotImplementedError): + getattr(ser, op_name)(skipna=skipna) + + @pytest.mark.parametrize("skipna", [True, False]) + def test_accumulate_series(self, data, all_numeric_accumulations, skipna, request): + pa_type = data.dtype.pyarrow_dtype + if all_numeric_accumulations != "cumsum" or pa_version_under9p0: + request.node.add_marker( + pytest.mark.xfail( + reason=f"{all_numeric_accumulations} not implemented", + raises=NotImplementedError, + ) + ) + elif not (pa.types.is_integer(pa_type) or pa.types.is_floating(pa_type)): + request.node.add_marker( + pytest.mark.xfail( + reason=f"{all_numeric_accumulations} not implemented for {pa_type}" + ) + ) + op_name = all_numeric_accumulations + ser = pd.Series(data) + self.check_accumulate(ser, op_name, skipna) + + class TestBaseNumericReduce(base.BaseNumericReduceTests): def check_reduce(self, ser, op_name, skipna): pa_dtype = ser.dtype.pyarrow_dtype