diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f53135174741e..427b1bfc28ba5 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -30,7 +30,7 @@ from pandas._config import config -from pandas._libs import Timestamp, iNaT, lib +from pandas._libs import Timestamp, lib from pandas._typing import ( Axis, FilePathOrBuffer, @@ -10102,8 +10102,6 @@ def mad(self, axis=None, skipna=None, level=None): desc="minimum", accum_func=np.minimum.accumulate, accum_func_name="min", - mask_a=np.inf, - mask_b=np.nan, examples=_cummin_examples, ) cls.cumsum = _make_cum_function( @@ -10115,8 +10113,6 @@ def mad(self, axis=None, skipna=None, level=None): desc="sum", accum_func=np.cumsum, accum_func_name="sum", - mask_a=0.0, - mask_b=np.nan, examples=_cumsum_examples, ) cls.cumprod = _make_cum_function( @@ -10128,8 +10124,6 @@ def mad(self, axis=None, skipna=None, level=None): desc="product", accum_func=np.cumprod, accum_func_name="prod", - mask_a=1.0, - mask_b=np.nan, examples=_cumprod_examples, ) cls.cummax = _make_cum_function( @@ -10141,8 +10135,6 @@ def mad(self, axis=None, skipna=None, level=None): desc="maximum", accum_func=np.maximum.accumulate, accum_func_name="max", - mask_a=-np.inf, - mask_b=np.nan, examples=_cummax_examples, ) @@ -11182,8 +11174,6 @@ def _make_cum_function( desc: str, accum_func: Callable, accum_func_name: str, - mask_a: float, - mask_b: float, examples: str, ) -> Callable: @Substitution( @@ -11205,61 +11195,15 @@ def cum_func(self, axis=None, skipna=True, *args, **kwargs): if axis == 1: return cum_func(self.T, axis=0, skipna=skipna, *args, **kwargs).T - def na_accum_func(blk_values): - # We will be applying this function to block values - if blk_values.dtype.kind in ["m", "M"]: - # GH#30460, GH#29058 - # numpy 1.18 started sorting NaTs at the end instead of beginning, - # so we need to work around to maintain backwards-consistency. - orig_dtype = blk_values.dtype - - # We need to define mask before masking NaTs - mask = isna(blk_values) - - if accum_func == np.minimum.accumulate: - # Note: the accum_func comparison fails as an "is" comparison - y = blk_values.view("i8") - y[mask] = np.iinfo(np.int64).max - changed = True - else: - y = blk_values - changed = False - - result = accum_func(y.view("i8"), axis) - if skipna: - np.putmask(result, mask, iNaT) - elif accum_func == np.minimum.accumulate: - # Restore NaTs that we masked previously - nz = (~np.asarray(mask)).nonzero()[0] - if len(nz): - # everything up to the first non-na entry stays NaT - result[: nz[0]] = iNaT - - if changed: - # restore NaT elements - y[mask] = iNaT # TODO: could try/finally for this? - - if isinstance(blk_values, np.ndarray): - result = result.view(orig_dtype) - else: - # DatetimeArray - result = type(blk_values)._from_sequence(result, dtype=orig_dtype) - - elif skipna and not issubclass( - blk_values.dtype.type, (np.integer, np.bool_) - ): - vals = blk_values.copy().T - mask = isna(vals) - np.putmask(vals, mask, mask_a) - result = accum_func(vals, axis) - np.putmask(result, mask, mask_b) - else: - result = accum_func(blk_values.T, axis) + def block_accum_func(blk_values): + values = blk_values.T if hasattr(blk_values, "T") else blk_values - # transpose back for ndarray, not for EA - return result.T if hasattr(result, "T") else result + result = nanops.na_accum_func(values, accum_func, skipna=skipna) + + result = result.T if hasattr(result, "T") else result + return result - result = self._data.apply(na_accum_func) + result = self._data.apply(block_accum_func) d = self._construct_axes_dict() d["copy"] = False diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 269843abb15ee..a5e70bd279d21 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -8,7 +8,7 @@ from pandas._config import get_option from pandas._libs import NaT, Period, Timedelta, Timestamp, iNaT, lib -from pandas._typing import Dtype, Scalar +from pandas._typing import ArrayLike, Dtype, Scalar from pandas.compat._optional import import_optional_dependency from pandas.core.dtypes.cast import _int64_max, maybe_upcast_putmask @@ -1500,3 +1500,75 @@ def nanpercentile( return result else: return np.percentile(values, q, axis=axis, interpolation=interpolation) + + +def na_accum_func(values: ArrayLike, accum_func, skipna: bool) -> ArrayLike: + """ + Cumulative function with skipna support. + + Parameters + ---------- + values : np.ndarray or ExtensionArray + accum_func : {np.cumprod, np.maximum.accumulate, np.cumsum, np.minumum.accumulate} + skipna : bool + + Returns + ------- + np.ndarray or ExtensionArray + """ + mask_a, mask_b = { + np.cumprod: (1.0, np.nan), + np.maximum.accumulate: (-np.inf, np.nan), + np.cumsum: (0.0, np.nan), + np.minimum.accumulate: (np.inf, np.nan), + }[accum_func] + + # We will be applying this function to block values + if values.dtype.kind in ["m", "M"]: + # GH#30460, GH#29058 + # numpy 1.18 started sorting NaTs at the end instead of beginning, + # so we need to work around to maintain backwards-consistency. + orig_dtype = values.dtype + + # We need to define mask before masking NaTs + mask = isna(values) + + if accum_func == np.minimum.accumulate: + # Note: the accum_func comparison fails as an "is" comparison + y = values.view("i8") + y[mask] = np.iinfo(np.int64).max + changed = True + else: + y = values + changed = False + + result = accum_func(y.view("i8"), axis=0) + if skipna: + result[mask] = iNaT + elif accum_func == np.minimum.accumulate: + # Restore NaTs that we masked previously + nz = (~np.asarray(mask)).nonzero()[0] + if len(nz): + # everything up to the first non-na entry stays NaT + result[: nz[0]] = iNaT + + if changed: + # restore NaT elements + y[mask] = iNaT # TODO: could try/finally for this? + + if isinstance(values, np.ndarray): + result = result.view(orig_dtype) + else: + # DatetimeArray + result = type(values)._from_sequence(result, dtype=orig_dtype) + + elif skipna and not issubclass(values.dtype.type, (np.integer, np.bool_)): + vals = values.copy() + mask = isna(vals) + vals[mask] = mask_a + result = accum_func(vals, axis=0) + result[mask] = mask_b + else: + result = accum_func(values, axis=0) + + return result