pandas-dev · datajanko · Sep 13, 2019 · Sep 14, 2019 · Sep 15, 2019 · Sep 18, 2019
diff --git a/doc/source/reference/extensions.rst b/doc/source/reference/extensions.rst
@@ -32,6 +32,7 @@ objects.
    .. autosummary::
       :toctree: api/
 
+      api.extensions.ExtensionArray._accumulate
       api.extensions.ExtensionArray._concat_same_type
       api.extensions.ExtensionArray._formatter
       api.extensions.ExtensionArray._from_factorized

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
@@ -342,6 +342,7 @@ Other enhancements
 - ``compute.use_numba`` now exists as a configuration option that utilizes the numba engine when available (:issue:`33966`, :issue:`35374`)
 - :meth:`Series.plot` now supports asymmetric error bars. Previously, if :meth:`Series.plot` received a "2xN" array with error values for ``yerr`` and/or ``xerr``, the left/lower values (first row) were mirrored, while the right/upper values (second row) were ignored. Now, the first row represents the left/lower error values and the second row the right/upper error values. (:issue:`9536`)
 
+
 .. ---------------------------------------------------------------------------
 
 .. _whatsnew_110.notable_bug_fixes:

diff --git a/pandas/conftest.py b/pandas/conftest.py
@@ -1005,6 +1005,17 @@ def all_logical_operators(request):
     return request.param
 
 
+_all_numeric_accumulations = ["cumsum", "cumprod", "cummin", "cummax"]
+
+
+@pytest.fixture(params=_all_numeric_accumulations)
+def all_numeric_accumulations(request):
+    """
+    Fixture for numeric accumulation names
+    """
+    return request.param
+
+
 # ----------------------------------------------------------------
 # Data sets/files
 # ----------------------------------------------------------------

diff --git a/pandas/core/array_algos/datetimelike_accumulations.py b/pandas/core/array_algos/datetimelike_accumulations.py
@@ -0,0 +1,69 @@
+from typing import Callable
+
+import numpy as np
+
+from pandas._libs import iNaT
+
+from pandas.core.dtypes.missing import isna
+
+"""
+datetimelke_accumulations.py is for accumulations of datetimelike extension arrays
+"""
+
+
+def _cum_func(
+    func: Callable,
+    values: np.ndarray,
+    *,
+    skipna: bool = True,
+):
+    """
+    Accumulations for 1D datetimelike arrays.
+
+    Parameters
+    ----------
+    func : np.cumsum, np.cumprod, np.maximum.accumulate, np.minimum.accumulate
+    values : np.ndarray
+        Numpy array with the values (can be of any dtype that support the
+        operation).
+    skipna : bool, default True
+        Whether to skip NA.
+    """
+    try:
+        fill_value = {
+            np.cumprod: 1,
+            np.maximum.accumulate: np.iinfo(np.int64).min,
+            np.cumsum: 0,
+            np.minimum.accumulate: np.iinfo(np.int64).max,
+        }[func]
+    except KeyError:
+        raise ValueError(f"No accumulation for {func} implemented on BaseMaskedArray")
+
+    mask = isna(values)
+    y = values.view("i8")
+    y[mask] = fill_value
+
+    if not skipna:
+        # This is different compared to the recent implementation for datetimelikes
+        # but is the same as the implementation for masked arrays
+        mask = np.maximum.accumulate(mask)
+
+    result = func(y)
+    result[mask] = iNaT
+    return result
+
+
+def cumsum(values: np.ndarray, *, skipna: bool = True):
+    return _cum_func(np.cumsum, values, skipna=skipna)
+
+
+def cumprod(values: np.ndarray, *, skipna: bool = True):
+    return _cum_func(np.cumprod, values, skipna=skipna)
+
+
+def cummin(values: np.ndarray, *, skipna: bool = True):
+    return _cum_func(np.minimum.accumulate, values, skipna=skipna)
+
+
+def cummax(values: np.ndarray, *, skipna: bool = True):
+    return _cum_func(np.maximum.accumulate, values, skipna=skipna)
diff --git a/pandas/core/array_algos/masked_accumulations.py b/pandas/core/array_algos/masked_accumulations.py
@@ -0,0 +1,78 @@
+from typing import Callable
+
+import numpy as np
+
+from pandas.core.dtypes.common import (
+    is_float_dtype,
+    is_integer_dtype,
+)
+
+"""
+masked_accumulations.py is for accumulation algorithms using a mask-based approach
+for missing values.
+"""
+
+
+def _cum_func(
+    func: Callable,
+    values: np.ndarray,
+    mask: np.ndarray,
+    *,
+    skipna: bool = True,
+):
+    """
+    Accumulations for 1D masked array.
+
+    Parameters
+    ----------
+    func : np.cumsum, np.cumprod, np.maximum.accumulate, np.minimum.accumulate
+    values : np.ndarray
+        Numpy array with the values (can be of any dtype that support the
+        operation).
+    mask : np.ndarray
+        Boolean numpy array (True values indicate missing values).
+    skipna : bool, default True
+        Whether to skip NA.
+    """
+    dtype_info = None
+    if is_float_dtype(values):
+        dtype_info = np.finfo(values.dtype.type)
+    elif is_integer_dtype(values):
+        dtype_info = np.iinfo(values.dtype.type)
+    else:
+        raise NotImplementedError(
+            f"No masked accumulation defined for dtype {values.dtype.type}"
+        )
+    try:
+        fill_value = {
+            np.cumprod: 1,
+            np.maximum.accumulate: dtype_info.min,
+            np.cumsum: 0,
+            np.minimum.accumulate: dtype_info.max,
+        }[func]
+    except KeyError:
+        raise ValueError(f"No accumulation for {func} implemented on BaseMaskedArray")
+
+    values[mask] = fill_value
+
+    if not skipna:
+        mask = np.maximum.accumulate(mask)
+
+    values = func(values)
+    return values, mask
+
+
+def cumsum(values: np.ndarray, mask: np.ndarray, *, skipna: bool = True):
+    return _cum_func(np.cumsum, values, mask, skipna=skipna)
+
+
+def cumprod(values: np.ndarray, mask: np.ndarray, *, skipna: bool = True):
+    return _cum_func(np.cumprod, values, mask, skipna=skipna)
+
+
+def cummin(values: np.ndarray, mask: np.ndarray, *, skipna: bool = True):
+    return _cum_func(np.minimum.accumulate, values, mask, skipna=skipna)
+
+
+def cummax(values: np.ndarray, mask: np.ndarray, *, skipna: bool = True):
+    return _cum_func(np.maximum.accumulate, values, mask, skipna=skipna)
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
@@ -108,6 +108,7 @@ class ExtensionArray:
     take
     unique
     view
+    _accumulate
     _concat_same_type
     _formatter
     _from_factorized
@@ -157,8 +158,9 @@ class ExtensionArray:
     as they only compose abstract methods. Still, a more efficient
     implementation may be available, and these methods can be overridden.
 
-    One can implement methods to handle array reductions.
+    One can implement methods to handle array accumulations or reductions.
 
+    * _accumulate
     * _reduce
 
     One can implement methods to handle parsing from strings that will be used
@@ -1253,6 +1255,37 @@ def _concat_same_type(
     # of objects
     _can_hold_na = True
 
+    def _accumulate(
+        self: ExtensionArray, name: str, *, skipna=True, **kwargs
+    ) -> ExtensionArray:
+        """
+        Return an ExtensionArray performing an accumulation operation.
+        The underlying data type might change
+
+        Parameters
+        ----------
+        name : str
+            Name of the function, supported values are:
+            - cummin
+            - cummax
+            - cumsum
+            - cumprod
+        skipna : bool, default True
+            If True, skip NA values.
+        **kwargs
+            Additional keyword arguments passed to the accumulation function.
+            Currently, there is no supported kwarg.
+
+        Returns
+        -------
+        array
+
+        Raises
+        ------
+        NotImplementedError : subclass does not define accumulations
+        """
+        raise NotImplementedError(f"cannot perform {name} with type {self.dtype}")
+
     def _reduce(self, name: str, *, skipna: bool = True, **kwargs):
         """
         Return a scalar result of performing the reduction operation.

diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py
@@ -712,6 +712,15 @@ def _reduce(self, name: str, *, skipna: bool = True, **kwargs):
 
         return super()._reduce(name, skipna=skipna, **kwargs)
 
+    def _accumulate(
+        self, name: str, *, skipna: bool = True, **kwargs
+    ) -> BaseMaskedArray:
+        from pandas.core.arrays import IntegerArray
+
+        data = self._data.astype(int)
+        mask = self._mask
+        return IntegerArray(data, mask)._accumulate(name, skipna=skipna, **kwargs)
+
     def _maybe_mask_result(self, result, mask, other, op_name: str):
         """
         Parameters

diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
@@ -93,6 +93,7 @@
     isin,
     unique1d,
 )
+from pandas.core.array_algos import datetimelike_accumulations
 from pandas.core.arraylike import OpsMixin
 from pandas.core.arrays._mixins import (
     NDArrayBackedExtensionArray,
@@ -1204,6 +1205,22 @@ def _time_shift(self, periods, freq=None):
         #  to be passed explicitly.
         return self._generate_range(start=start, end=end, periods=None, freq=self.freq)
 
+    def _accumulate(
+        self, name: str, *, skipna: bool = True, **kwargs
+    ) -> DatetimeLikeArrayT:
+
+        data = self._data.copy()
+
+        if name in {"cummin", "cummax"}:
+            op = getattr(datetimelike_accumulations, name)
+            data = op(data, skipna=skipna, **kwargs)
+
+            return type(self)._simple_new(data, freq=self.freq, dtype=self.dtype)
+
+        raise NotImplementedError(
+            f"Accumlation {name} not implemented for {type(self)}"
+        )
+
     @unpack_zerodim_and_defer("__add__")
     def __add__(self, other):
         other_dtype = getattr(other, "dtype", None)

diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
@@ -49,7 +49,10 @@
     isin,
     take,
 )
-from pandas.core.array_algos import masked_reductions
+from pandas.core.array_algos import (
+    masked_accumulations,
+    masked_reductions,
+)
 from pandas.core.arraylike import OpsMixin
 from pandas.core.arrays import ExtensionArray
 from pandas.core.indexers import check_array_indexer
@@ -457,3 +460,19 @@ def _reduce(self, name: str, *, skipna: bool = True, **kwargs):
             return libmissing.NA
 
         return result
+
+    def _accumulate(
+        self, name: str, *, skipna: bool = True, **kwargs
+    ) -> BaseMaskedArray:
+        data = self._data
+        mask = self._mask
+
+        if name in {"cumsum", "cumprod", "cummin", "cummax"}:
+            op = getattr(masked_accumulations, name)
+            data, mask = op(data, mask, skipna=skipna, **kwargs)
+
+        return type(self)(data, mask, copy=False)
+
+        raise NotImplementedError(
+            "Accumlation {name} not implemented for BaseMaskedArray"
+        )
diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py
@@ -57,6 +57,7 @@
 
 from pandas.core import nanops
 from pandas.core.algorithms import checked_add_with_arr
+from pandas.core.array_algos import datetimelike_accumulations
 from pandas.core.arrays import (
     IntegerArray,
     datetimelike as dtl,
@@ -403,6 +404,24 @@ def std(
             return self._box_func(result)
         return self._from_backing_data(result)
 
+    # ----------------------------------------------------------------
+    # Accumulations
+
+    def _accumulate(
+        self, name: str, *, skipna: bool = True, **kwargs
+    ) -> TimedeltaArray:
+
+        data = self._data.copy()
+
+        if name in {"cumsum", "cumsum"}:
+            op = getattr(datetimelike_accumulations, name)
+            data = op(data, skipna=skipna, **kwargs)
+
+            return type(self)._simple_new(data, freq=None, dtype=self.dtype)
+
+        else:
+            return super()._accumulate(name, skipna=skipna, **kwargs)
+
     # ----------------------------------------------------------------
     # Rendering Methods
 

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -10320,7 +10320,14 @@ def _accum_func(self, name: str, func, axis=None, skipna=True, *args, **kwargs):
         def block_accum_func(blk_values):
             values = blk_values.T if hasattr(blk_values, "T") else blk_values
 
-            result = nanops.na_accum_func(values, func, skipna=skipna)
+            from pandas.core.construction import ensure_wrapped_if_datetimelike
+
+            values = ensure_wrapped_if_datetimelike(values)
+
+            if isinstance(values, ExtensionArray):
+                result = values._accumulate(name, skipna=skipna, **kwargs)
+            else:
+                result = nanops.na_accum_func(values, func, skipna=skipna)
 
             result = result.T if hasattr(result, "T") else result
             return result

diff --git a/pandas/tests/extension/base/__init__.py b/pandas/tests/extension/base/__init__.py
@@ -41,6 +41,10 @@ class TestMyDtype(BaseDtypeTests):
 ``assert_series_equal`` on your base test class.
 
 """
+from pandas.tests.extension.base.accumulate import (  # noqa
+    BaseNoAccumulateTests,
+    BaseNumericAccumulateTests,
+)
 from pandas.tests.extension.base.casting import BaseCastingTests  # noqa
 from pandas.tests.extension.base.constructors import BaseConstructorsTests  # noqa
 from pandas.tests.extension.base.dtype import BaseDtypeTests  # noqa