diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 9b1819a7d4d9f..451cd544ef650 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -424,7 +424,7 @@ ExtensionArray - Bug in :meth:`DataFrame.where` when ``other`` is a :class:`Series` with :class:`ExtensionArray` dtype (:issue:`38729`) - Fixed bug where :meth:`Series.idxmax`, :meth:`Series.idxmin` and ``argmax/min`` fail when the underlying data is :class:`ExtensionArray` (:issue:`32749`, :issue:`33719`, :issue:`36566`) -- +- Bug in cumulative functions (``cumsum``, ``cumprod``, ``cummax`` and ``cummin``) with extension dtypes not handling ``NA`` correctly and returning object dtype (:issue:`39479`) Other ^^^^^ diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 8d3363df0d132..adb1fc3dc7edd 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -18,6 +18,7 @@ is_bool_dtype, is_complex, is_datetime64_any_dtype, + is_extension_array_dtype, is_float, is_float_dtype, is_integer, @@ -32,7 +33,7 @@ from pandas.core.dtypes.dtypes import PeriodDtype from pandas.core.dtypes.missing import isna, na_value_for_dtype, notna -from pandas.core.construction import extract_array +from pandas.core.construction import extract_array, sanitize_array bn = import_optional_dependency("bottleneck", errors="warn") _BOTTLENECK_INSTALLED = bn is not None @@ -1728,6 +1729,21 @@ def na_accum_func(values: ArrayLike, accum_func, *, skipna: bool) -> ArrayLike: result, dtype=orig_dtype ) + elif is_extension_array_dtype(values.dtype): + if is_integer_dtype(values.dtype) and np.isinf(mask_a): + mask_a = { + np.maximum.accumulate: np.iinfo(values.dtype.type).min, + np.minimum.accumulate: np.iinfo(values.dtype.type).max, + }[accum_func] + + vals = values.copy() + mask = isna(vals) + mask_copy = np.copy(mask) + vals[mask] = mask_a + result = accum_func(vals, axis=0) + result[mask_copy] = mask_b + result = sanitize_array(result, None, values.dtype) + elif skipna and not issubclass(values.dtype.type, (np.integer, np.bool_)): vals = values.copy() mask = isna(vals) diff --git a/pandas/tests/frame/test_cumulative.py b/pandas/tests/frame/test_cumulative.py index 248f3500c41df..32dda8eae2eb4 100644 --- a/pandas/tests/frame/test_cumulative.py +++ b/pandas/tests/frame/test_cumulative.py @@ -7,8 +7,9 @@ """ import numpy as np +import pytest -from pandas import DataFrame, Series +from pandas import NA, DataFrame, Series import pandas._testing as tm @@ -133,3 +134,20 @@ def test_cumulative_ops_preserve_dtypes(self): } ) tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "func, exp", + [ + ("cumsum", [2, NA, 7, 6, 6]), + ("cumprod", [2, NA, 10, -10, 0]), + ("cummin", [2, NA, 2, -1, -1]), + ("cummax", [2, NA, 5, 5, 5]), + ], + ) + @pytest.mark.parametrize("dtype", ["Float64", "Int64"]) + def test_cummulative_ops_extension_dtype(self, frame_or_series, dtype, func, exp): + # GH#39479 + obj = frame_or_series([2, np.nan, 5, -1, 0], dtype=dtype) + result = getattr(obj, func)() + expected = frame_or_series(exp, dtype=dtype) + tm.assert_equal(result, expected)