From 9ac04ebf7577a3961cd32fe5339b2a68b4b97b89 Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Sat, 30 Jan 2021 18:31:29 +0100
Subject: [PATCH 1/4] BUG: cumulative functions with ea dtype not handling NA
 correctly and casting to object

---
 doc/source/whatsnew/v1.3.0.rst        |  2 +-
 pandas/core/nanops.py                 | 24 +++++++++++++++++++++---
 pandas/tests/frame/test_cumulative.py | 20 +++++++++++++++++++-
 3 files changed, 41 insertions(+), 5 deletions(-)

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index 9b1819a7d4d9f..451cd544ef650 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -424,7 +424,7 @@ ExtensionArray
 
 - Bug in :meth:`DataFrame.where` when ``other`` is a :class:`Series` with :class:`ExtensionArray` dtype (:issue:`38729`)
 - Fixed bug where :meth:`Series.idxmax`, :meth:`Series.idxmin` and ``argmax/min`` fail when the underlying data is :class:`ExtensionArray` (:issue:`32749`, :issue:`33719`, :issue:`36566`)
--
+- Bug in cumulative functions (``cumsum``, ``cumprod``, ``cummax`` and ``cummin``)  with extension dtypes not handling ``NA`` correctly and returning object dtype (:issue:`39479`)
 
 Other
 ^^^^^
diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
index 8d3363df0d132..442c8d740f703 100644
--- a/pandas/core/nanops.py
+++ b/pandas/core/nanops.py
@@ -18,6 +18,7 @@
     is_bool_dtype,
     is_complex,
     is_datetime64_any_dtype,
+    is_extension_array_dtype,
     is_float,
     is_float_dtype,
     is_integer,
@@ -32,7 +33,7 @@
 from pandas.core.dtypes.dtypes import PeriodDtype
 from pandas.core.dtypes.missing import isna, na_value_for_dtype, notna
 
-from pandas.core.construction import extract_array
+from pandas.core.construction import extract_array, sanitize_array
 
 bn = import_optional_dependency("bottleneck", errors="warn")
 _BOTTLENECK_INSTALLED = bn is not None
@@ -1687,6 +1688,8 @@ def na_accum_func(values: ArrayLike, accum_func, *, skipna: bool) -> ArrayLike:
         np.minimum.accumulate: (np.inf, np.nan),
     }[accum_func]
 
+    is_ea_dtype = is_extension_array_dtype(values.dtype)
+
     # We will be applying this function to block values
     if values.dtype.kind in ["m", "M"]:
         # GH#30460, GH#29058
@@ -1728,12 +1731,27 @@ def na_accum_func(values: ArrayLike, accum_func, *, skipna: bool) -> ArrayLike:
                 result, dtype=orig_dtype
             )
 
-    elif skipna and not issubclass(values.dtype.type, (np.integer, np.bool_)):
+    elif (
+        skipna
+        and not issubclass(values.dtype.type, (np.integer, np.bool_))
+        or is_ea_dtype
+    ):
+        if is_integer_dtype(values.dtype) and np.isinf(mask_a):
+            mask_a = {
+                np.maximum.accumulate: np.iinfo(values.dtype.numpy_dtype).min,
+                np.minimum.accumulate: np.iinfo(values.dtype.numpy_dtype).max,
+            }[accum_func]
+
         vals = values.copy()
         mask = isna(vals)
+        mask_keep = np.copy(mask) if is_ea_dtype else mask
         vals[mask] = mask_a
         result = accum_func(vals, axis=0)
-        result[mask] = mask_b
+        result[mask_keep] = mask_b
+
+        if is_ea_dtype:
+            result = sanitize_array(result, None, values.dtype)
+
     else:
         result = accum_func(values, axis=0)
 
diff --git a/pandas/tests/frame/test_cumulative.py b/pandas/tests/frame/test_cumulative.py
index 248f3500c41df..32dda8eae2eb4 100644
--- a/pandas/tests/frame/test_cumulative.py
+++ b/pandas/tests/frame/test_cumulative.py
@@ -7,8 +7,9 @@
 """
 
 import numpy as np
+import pytest
 
-from pandas import DataFrame, Series
+from pandas import NA, DataFrame, Series
 import pandas._testing as tm
 
 
@@ -133,3 +134,20 @@ def test_cumulative_ops_preserve_dtypes(self):
             }
         )
         tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize(
+        "func, exp",
+        [
+            ("cumsum", [2, NA, 7, 6, 6]),
+            ("cumprod", [2, NA, 10, -10, 0]),
+            ("cummin", [2, NA, 2, -1, -1]),
+            ("cummax", [2, NA, 5, 5, 5]),
+        ],
+    )
+    @pytest.mark.parametrize("dtype", ["Float64", "Int64"])
+    def test_cummulative_ops_extension_dtype(self, frame_or_series, dtype, func, exp):
+        # GH#39479
+        obj = frame_or_series([2, np.nan, 5, -1, 0], dtype=dtype)
+        result = getattr(obj, func)()
+        expected = frame_or_series(exp, dtype=dtype)
+        tm.assert_equal(result, expected)

From 8d212a61a7426a686a7c2d0ea01f48781dd9c952 Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Sat, 30 Jan 2021 19:44:16 +0100
Subject: [PATCH 2/4] Fix mypy issues

---
 pandas/core/nanops.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
index 442c8d740f703..5e4cc031deb99 100644
--- a/pandas/core/nanops.py
+++ b/pandas/core/nanops.py
@@ -1738,8 +1738,8 @@ def na_accum_func(values: ArrayLike, accum_func, *, skipna: bool) -> ArrayLike:
     ):
         if is_integer_dtype(values.dtype) and np.isinf(mask_a):
             mask_a = {
-                np.maximum.accumulate: np.iinfo(values.dtype.numpy_dtype).min,
-                np.minimum.accumulate: np.iinfo(values.dtype.numpy_dtype).max,
+                np.maximum.accumulate: np.iinfo(values.dtype.type).min,
+                np.minimum.accumulate: np.iinfo(values.dtype.type).max,
             }[accum_func]
 
         vals = values.copy()

From ce49358b07694cd13023d1cada0b1c35c53f847c Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Sat, 30 Jan 2021 22:24:37 +0100
Subject: [PATCH 3/4] Refactor elif

---
 pandas/core/nanops.py | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
index 5e4cc031deb99..4cfdac77ad63b 100644
--- a/pandas/core/nanops.py
+++ b/pandas/core/nanops.py
@@ -1688,8 +1688,6 @@ def na_accum_func(values: ArrayLike, accum_func, *, skipna: bool) -> ArrayLike:
         np.minimum.accumulate: (np.inf, np.nan),
     }[accum_func]
 
-    is_ea_dtype = is_extension_array_dtype(values.dtype)
-
     # We will be applying this function to block values
     if values.dtype.kind in ["m", "M"]:
         # GH#30460, GH#29058
@@ -1731,11 +1729,7 @@ def na_accum_func(values: ArrayLike, accum_func, *, skipna: bool) -> ArrayLike:
                 result, dtype=orig_dtype
             )
 
-    elif (
-        skipna
-        and not issubclass(values.dtype.type, (np.integer, np.bool_))
-        or is_ea_dtype
-    ):
+    elif is_extension_array_dtype(values.dtype):
         if is_integer_dtype(values.dtype) and np.isinf(mask_a):
             mask_a = {
                 np.maximum.accumulate: np.iinfo(values.dtype.type).min,
@@ -1744,13 +1738,18 @@ def na_accum_func(values: ArrayLike, accum_func, *, skipna: bool) -> ArrayLike:
 
         vals = values.copy()
         mask = isna(vals)
-        mask_keep = np.copy(mask) if is_ea_dtype else mask
+        mask_copy = np.copy(mask)
         vals[mask] = mask_a
         result = accum_func(vals, axis=0)
-        result[mask_keep] = mask_b
+        result[mask_copy] = mask_b
+        result = sanitize_array(result, None, values.dtype)
 
-        if is_ea_dtype:
-            result = sanitize_array(result, None, values.dtype)
+    elif skipna and not issubclass(values.dtype.type, (np.integer, np.bool_)):
+        vals = values.copy()
+        mask = isna(vals)
+        vals[mask] = mask_a
+        result = accum_func(vals, axis=0)
+        result[mask] = mask_b
 
     else:
         result = accum_func(values, axis=0)

From eb29bf2c7d888f335ac3fcd9433b4931bb694c0d Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Sat, 30 Jan 2021 22:25:29 +0100
Subject: [PATCH 4/4] Remove empty line

---
 pandas/core/nanops.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
index 4cfdac77ad63b..adb1fc3dc7edd 100644
--- a/pandas/core/nanops.py
+++ b/pandas/core/nanops.py
@@ -1750,7 +1750,6 @@ def na_accum_func(values: ArrayLike, accum_func, *, skipna: bool) -> ArrayLike:
         vals[mask] = mask_a
         result = accum_func(vals, axis=0)
         result[mask] = mask_b
-
     else:
         result = accum_func(values, axis=0)