From 97d7e11082270d5813fc5c700746bd753c410b09 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Sun, 6 Jun 2021 16:56:17 -0400
Subject: [PATCH 01/11] precommit fixup

---
 pandas/_libs/groupby.pyx | 89 ++++++++++++++++++++++++++++------------
 1 file changed, 63 insertions(+), 26 deletions(-)
diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index b72b927b3c2a8..40933c8f30319 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -1345,13 +1345,8 @@ cdef group_cummin_max(groupby_t[:, ::1] out,
     This method modifies the `out` parameter, rather than returning an object.
     """
     cdef:
-        Py_ssize_t i, j, N, K, size
-        groupby_t val, mval
+        Py_ssize_t N, K
         groupby_t[:, ::1] accum
-        intp_t lab
-        bint val_is_nan, use_mask
-
-    use_mask = mask is not None
 
     N, K = (<object>values).shape
     accum = np.empty((ngroups, K), dtype=values.dtype)
@@ -1362,36 +1357,78 @@ cdef group_cummin_max(groupby_t[:, ::1] out,
     else:
         accum[:] = -np.inf if compute_max else np.inf
 
+    if mask is not None:
+        masked_cummin_max(out, values, mask, labels, accum, N, K, compute_max)
+    else:
+        cummin_max(out, values, labels, accum, N, K, is_datetimelike, compute_max)
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef cummin_max(groupby_t[:, ::1] out,
+                ndarray[groupby_t, ndim=2] values,
+                const intp_t[:] labels,
+                groupby_t[:, ::1] accum,
+                Py_ssize_t N,
+                Py_ssize_t K,
+                bint is_datetimelike,
+                bint compute_max):
+    """
+    Compute the cumulative minimum/maximum of columns of `values`, in row groups
+    `labels`.
+    """
+    cdef:
+        Py_ssize_t i, j
+        groupby_t val, mval
+        intp_t lab
+
     with nogil:
         for i in range(N):
             lab = labels[i]
-
             if lab < 0:
                 continue
             for j in range(K):
-                val_is_nan = False
-
-                if use_mask:
-                    if mask[i, j]:
-
-                        # `out` does not need to be set since it
-                        # will be masked anyway
-                        val_is_nan = True
+                val = values[i, j]
+                if not _treat_as_na(val, is_datetimelike):
+                    mval = accum[lab, j]
+                    if compute_max:
+                        if val > mval:
+                            accum[lab, j] = mval = val
                     else:
+                        if val < mval:
+                            accum[lab, j] = mval = val
+                    out[i, j] = mval
+                else:
+                    out[i, j] = val
 
-                        # If using the mask, we can avoid grabbing the
-                        # value unless necessary
-                        val = values[i, j]
 
-                # Otherwise, `out` must be set accordingly if the
-                # value is missing
-                else:
-                    val = values[i, j]
-                    if _treat_as_na(val, is_datetimelike):
-                        val_is_nan = True
-                        out[i, j] = val
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef masked_cummin_max(groupby_t[:, ::1] out,
+                       ndarray[groupby_t, ndim=2] values,
+                       uint8_t[:, ::1] mask,
+                       const intp_t[:] labels,
+                       groupby_t[:, ::1] accum,
+                       Py_ssize_t N,
+                       Py_ssize_t K,
+                       bint compute_max):
+    """
+    Compute the cumulative minimum/maximum of columns of `values`, in row groups
+    `labels` with a masked algorithm.
+    """
+    cdef:
+        Py_ssize_t i, j
+        groupby_t val, mval
+        intp_t lab
 
-                if not val_is_nan:
+    with nogil:
+        for i in range(N):
+            lab = labels[i]
+            if lab < 0:
+                continue
+            for j in range(K):
+                if not mask[i, j]:
+                    val = values[i, j]
                     mval = accum[lab, j]
                     if compute_max:
                         if val > mval:

From ee7a7ef627c475faa3a6e3b45a91ab204a8cf84a Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Sun, 6 Jun 2021 18:51:32 -0400
Subject: [PATCH 02/11] precommit fixup

---
 doc/source/whatsnew/v1.3.0.rst        |  1 +
 pandas/_libs/groupby.pyx              | 88 +++++++++++++++++----------
 pandas/core/groupby/groupby.py        | 20 ++++--
 pandas/tests/groupby/test_function.py | 10 +++
 4 files changed, 84 insertions(+), 35 deletions(-)

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index ce613fd78c1e1..44c9a39d30932 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -230,6 +230,7 @@ Other enhancements
 - :meth:`.GroupBy.any` and :meth:`.GroupBy.all` use Kleene logic with nullable data types (:issue:`37506`)
 - :meth:`.GroupBy.any` and :meth:`.GroupBy.all` return a ``BooleanDtype`` for columns with nullable data types (:issue:`33449`)
 - :meth:`.GroupBy.rank` now supports object-dtype data (:issue:`38278`)
+- :meth:`.GroupBy.cummin` and :meth:`.GroupBy.cummax` now support the argument ``skipna`` (:issue:`34047`)
 - Constructing a :class:`DataFrame` or :class:`Series` with the ``data`` argument being a Python iterable that is *not* a NumPy ``ndarray`` consisting of NumPy scalars will now result in a dtype with a precision the maximum of the NumPy scalars; this was already the case when ``data`` is a NumPy ``ndarray`` (:issue:`40908`)
 - Add keyword ``sort`` to :func:`pivot_table` to allow non-sorting of the result (:issue:`39143`)
 - Add keyword ``dropna`` to :meth:`DataFrame.value_counts` to allow counting rows that include ``NA`` values (:issue:`41325`)
diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index 40933c8f30319..75ac94975e8b7 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -1317,6 +1317,7 @@ cdef group_cummin_max(groupby_t[:, ::1] out,
                       const intp_t[:] labels,
                       int ngroups,
                       bint is_datetimelike,
+                      bint skipna,
                       bint compute_max):
     """
     Cumulative minimum/maximum of columns of `values`, in row groups `labels`.
@@ -1336,6 +1337,9 @@ cdef group_cummin_max(groupby_t[:, ::1] out,
         Number of groups, larger than all entries of `labels`.
     is_datetimelike : bool
         True if `values` contains datetime-like entries.
+    skipna : bool
+        If True, ignore nans in `values`.
+
     compute_max : bool
         True if cumulative maximum should be computed, False
         if cumulative minimum should be computed
@@ -1358,9 +1362,9 @@ cdef group_cummin_max(groupby_t[:, ::1] out,
         accum[:] = -np.inf if compute_max else np.inf
 
     if mask is not None:
-        masked_cummin_max(out, values, mask, labels, accum, N, K, compute_max)
+        masked_cummin_max(out, values, mask, labels, accum, skipna, compute_max)
     else:
-        cummin_max(out, values, labels, accum, N, K, is_datetimelike, compute_max)
+        cummin_max(out, values, labels, accum, skipna, is_datetimelike, compute_max)
 
 
 @cython.boundscheck(False)
@@ -1369,8 +1373,7 @@ cdef cummin_max(groupby_t[:, ::1] out,
                 ndarray[groupby_t, ndim=2] values,
                 const intp_t[:] labels,
                 groupby_t[:, ::1] accum,
-                Py_ssize_t N,
-                Py_ssize_t K,
+                bint skipna,
                 bint is_datetimelike,
                 bint compute_max):
     """
@@ -1378,28 +1381,40 @@ cdef cummin_max(groupby_t[:, ::1] out,
     `labels`.
     """
     cdef:
-        Py_ssize_t i, j
-        groupby_t val, mval
+        Py_ssize_t i, j, N, K
+        groupby_t val, mval, na_val
+        uint8_t[::1] seen_na
         intp_t lab
 
+    if groupby_t is float64_t or groupby_t is float32_t:
+        na_val = NaN
+    elif is_datetimelike:
+        na_val = NPY_NAT
+
+    seen_na = np.zeros((<object>values).shape[0], dtype=np.uint8)
+    N, K = (<object>values).shape
     with nogil:
         for i in range(N):
             lab = labels[i]
             if lab < 0:
                 continue
             for j in range(K):
-                val = values[i, j]
-                if not _treat_as_na(val, is_datetimelike):
-                    mval = accum[lab, j]
-                    if compute_max:
-                        if val > mval:
-                            accum[lab, j] = mval = val
-                    else:
-                        if val < mval:
-                            accum[lab, j] = mval = val
-                    out[i, j] = mval
+                if not skipna and seen_na[lab]:
+                    out[i, j] = na_val
                 else:
-                    out[i, j] = val
+                    val = values[i, j]
+                    if not _treat_as_na(val, is_datetimelike):
+                        mval = accum[lab, j]
+                        if compute_max:
+                            if val > mval:
+                                accum[lab, j] = mval = val
+                        else:
+                            if val < mval:
+                                accum[lab, j] = mval = val
+                        out[i, j] = mval
+                    else:
+                        seen_na[lab] = 1
+                        out[i, j] = val
 
 
 @cython.boundscheck(False)
@@ -1409,34 +1424,41 @@ cdef masked_cummin_max(groupby_t[:, ::1] out,
                        uint8_t[:, ::1] mask,
                        const intp_t[:] labels,
                        groupby_t[:, ::1] accum,
-                       Py_ssize_t N,
-                       Py_ssize_t K,
+                       bint skipna,
                        bint compute_max):
     """
     Compute the cumulative minimum/maximum of columns of `values`, in row groups
     `labels` with a masked algorithm.
     """
     cdef:
-        Py_ssize_t i, j
+        Py_ssize_t i, j, N, K
         groupby_t val, mval
+        uint8_t[::1] seen_na
         intp_t lab
 
+    seen_na = np.zeros((<object>values).shape[0], dtype=np.uint8)
+    N, K = (<object>values).shape
     with nogil:
         for i in range(N):
             lab = labels[i]
             if lab < 0:
                 continue
             for j in range(K):
-                if not mask[i, j]:
-                    val = values[i, j]
-                    mval = accum[lab, j]
-                    if compute_max:
-                        if val > mval:
-                            accum[lab, j] = mval = val
+                if not skipna and seen_na[lab]:
+                    mask[i, j] = 1
+                else:
+                    if not mask[i, j]:
+                        val = values[i, j]
+                        mval = accum[lab, j]
+                        if compute_max:
+                            if val > mval:
+                                accum[lab, j] = mval = val
+                        else:
+                            if val < mval:
+                                accum[lab, j] = mval = val
+                        out[i, j] = mval
                     else:
-                        if val < mval:
-                            accum[lab, j] = mval = val
-                    out[i, j] = mval
+                        seen_na[lab] = 1
 
 
 @cython.boundscheck(False)
@@ -1446,7 +1468,8 @@ def group_cummin(groupby_t[:, ::1] out,
                  const intp_t[:] labels,
                  int ngroups,
                  bint is_datetimelike,
-                 uint8_t[:, ::1] mask=None) -> None:
+                 uint8_t[:, ::1] mask=None,
+                 bint skipna=True) -> None:
     """See group_cummin_max.__doc__"""
     group_cummin_max(
         out,
@@ -1455,6 +1478,7 @@ def group_cummin(groupby_t[:, ::1] out,
         labels,
         ngroups,
         is_datetimelike,
+        skipna,
         compute_max=False
     )
 
@@ -1466,7 +1490,8 @@ def group_cummax(groupby_t[:, ::1] out,
                  const intp_t[:] labels,
                  int ngroups,
                  bint is_datetimelike,
-                 uint8_t[:, ::1] mask=None) -> None:
+                 uint8_t[:, ::1] mask=None,
+                 bint skipna=True) -> None:
     """See group_cummin_max.__doc__"""
     group_cummin_max(
         out,
@@ -1475,5 +1500,6 @@ def group_cummax(groupby_t[:, ::1] out,
         labels,
         ngroups,
         is_datetimelike,
+        skipna,
         compute_max=True
     )
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 6deb5bb1a76f0..61b16392c43b5 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -2729,7 +2729,7 @@ def cumsum(self, axis=0, *args, **kwargs):
     @final
     @Substitution(name="groupby")
     @Appender(_common_see_also)
-    def cummin(self, axis=0, **kwargs):
+    def cummin(self, axis=0, *args, **kwargs):
         """
         Cumulative min for each group.
 
@@ -2737,15 +2737,21 @@ def cummin(self, axis=0, **kwargs):
         -------
         Series or DataFrame
         """
+        # See groupby/test_function::test_cummin_cummax, current behavior allows
+        # but ignores numeric_only
+        if "numeric_only" in kwargs:
+            kwargs = kwargs.copy()
+            del kwargs["numeric_only"]
+        nv.validate_groupby_func("cummin", args, kwargs, ["skipna"])
         if axis != 0:
             return self.apply(lambda x: np.minimum.accumulate(x, axis))
 
-        return self._cython_transform("cummin", numeric_only=False)
+        return self._cython_transform("cummin", numeric_only=False, **kwargs)
 
     @final
     @Substitution(name="groupby")
     @Appender(_common_see_also)
-    def cummax(self, axis=0, **kwargs):
+    def cummax(self, axis=0, *args, **kwargs):
         """
         Cumulative max for each group.
 
@@ -2753,10 +2759,16 @@ def cummax(self, axis=0, **kwargs):
         -------
         Series or DataFrame
         """
+        # See groupby/test_function::test_cummin_cummax, current behavior allows
+        # but ignores numeric_only
+        if "numeric_only" in kwargs:
+            kwargs = kwargs.copy()
+            del kwargs["numeric_only"]
+        nv.validate_groupby_func("cummax", args, kwargs, ["skipna"])
         if axis != 0:
             return self.apply(lambda x: np.maximum.accumulate(x, axis))
 
-        return self._cython_transform("cummax", numeric_only=False)
+        return self._cython_transform("cummax", numeric_only=False, **kwargs)
 
     @final
     def _get_cythonized_result(
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index 95bb010015f62..b79eb15c1e835 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -904,6 +904,16 @@ def test_cummax(dtypes_for_minmax):
     tm.assert_series_equal(result, expected)
 
 
+@pytest.mark.parametrize("method", ["cummin", "cummax"])
+@pytest.mark.parametrize("dtype", ["float", "Int64", "Float64"])
+def test_cummin_max_skipna(method, dtype):
+    # GH-34047
+    df = DataFrame({"a": Series([1, None, 1], dtype=dtype)})
+    result = getattr(df.groupby([1, 1, 1])["a"], method)(skipna=False)
+    expected = Series([1, None, None], dtype=dtype, name="a")
+    tm.assert_series_equal(result, expected)
+
+
 @td.skip_if_32bit
 @pytest.mark.parametrize("method", ["cummin", "cummax"])
 @pytest.mark.parametrize(

From 885f8ae956e918c6cfe360bd2cb66aebbd5879cc Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Sun, 6 Jun 2021 20:18:47 -0400
Subject: [PATCH 03/11] wip

---
 pandas/_libs/groupby.pyx | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index 40933c8f30319..ee57ff5c2a205 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -1358,9 +1358,9 @@ cdef group_cummin_max(groupby_t[:, ::1] out,
         accum[:] = -np.inf if compute_max else np.inf
 
     if mask is not None:
-        masked_cummin_max(out, values, mask, labels, accum, N, K, compute_max)
+        masked_cummin_max(out, values, mask, labels, accum, compute_max)
     else:
-        cummin_max(out, values, labels, accum, N, K, is_datetimelike, compute_max)
+        cummin_max(out, values, labels, accum, is_datetimelike, compute_max)
 
 
 @cython.boundscheck(False)
@@ -1369,8 +1369,6 @@ cdef cummin_max(groupby_t[:, ::1] out,
                 ndarray[groupby_t, ndim=2] values,
                 const intp_t[:] labels,
                 groupby_t[:, ::1] accum,
-                Py_ssize_t N,
-                Py_ssize_t K,
                 bint is_datetimelike,
                 bint compute_max):
     """
@@ -1378,10 +1376,11 @@ cdef cummin_max(groupby_t[:, ::1] out,
     `labels`.
     """
     cdef:
-        Py_ssize_t i, j
+        Py_ssize_t i, j, N, K
         groupby_t val, mval
         intp_t lab
 
+    N, K = (<object>values).shape
     with nogil:
         for i in range(N):
             lab = labels[i]
@@ -1409,18 +1408,17 @@ cdef masked_cummin_max(groupby_t[:, ::1] out,
                        uint8_t[:, ::1] mask,
                        const intp_t[:] labels,
                        groupby_t[:, ::1] accum,
-                       Py_ssize_t N,
-                       Py_ssize_t K,
                        bint compute_max):
     """
     Compute the cumulative minimum/maximum of columns of `values`, in row groups
     `labels` with a masked algorithm.
     """
     cdef:
-        Py_ssize_t i, j
+        Py_ssize_t i, j, N, K
         groupby_t val, mval
         intp_t lab
 
+    N, K = (<object>values).shape
     with nogil:
         for i in range(N):
             lab = labels[i]

From 51f8f9c8ccd0def2c0a4f93dc398a89ab94e986e Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Mon, 7 Jun 2021 11:00:19 -0400
Subject: [PATCH 04/11] Remove unused

---
 pandas/_libs/groupby.pyx | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index ee57ff5c2a205..0e0598c3264e8 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -1345,11 +1345,9 @@ cdef group_cummin_max(groupby_t[:, ::1] out,
     This method modifies the `out` parameter, rather than returning an object.
     """
     cdef:
-        Py_ssize_t N, K
         groupby_t[:, ::1] accum
 
-    N, K = (<object>values).shape
-    accum = np.empty((ngroups, K), dtype=values.dtype)
+    accum = np.empty((ngroups, (<object>values).shape[1]), dtype=values.dtype)
     if groupby_t is int64_t:
         accum[:] = -_int64_max if compute_max else _int64_max
     elif groupby_t is uint64_t:

From 21e515758eeb591643ea704aec861077e7ade469 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Mon, 7 Jun 2021 11:24:23 -0400
Subject: [PATCH 05/11] Add multigroup tests

---
 pandas/tests/groupby/test_function.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index b79eb15c1e835..478d903ec791c 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -906,11 +906,19 @@ def test_cummax(dtypes_for_minmax):
 
 @pytest.mark.parametrize("method", ["cummin", "cummax"])
 @pytest.mark.parametrize("dtype", ["float", "Int64", "Float64"])
-def test_cummin_max_skipna(method, dtype):
+@pytest.mark.parametrize(
+    "groups,expected_data",
+    [
+        ([1, 1, 1], [1, None, None]),
+        ([1, 2, 3], [1, None, 2]),
+        ([1, 3, 3], [1, None, None]),
+    ],
+)
+def test_cummin_max_skipna(method, dtype, groups, expected_data):
     # GH-34047
-    df = DataFrame({"a": Series([1, None, 1], dtype=dtype)})
-    result = getattr(df.groupby([1, 1, 1])["a"], method)(skipna=False)
-    expected = Series([1, None, None], dtype=dtype, name="a")
+    df = DataFrame({"a": Series([1, None, 2], dtype=dtype)})
+    result = getattr(df.groupby(groups)["a"], method)(skipna=False)
+    expected = Series(expected_data, dtype=dtype, name="a")
     tm.assert_series_equal(result, expected)
 
 

From 7858fc657986f80ca84538261664c04e69adc55e Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Mon, 7 Jun 2021 11:34:40 -0400
Subject: [PATCH 06/11] Cleaner kwargs handling

---
 pandas/core/groupby/groupby.py | 22 ++++++----------------
 1 file changed, 6 insertions(+), 16 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 61b16392c43b5..eca4915c49732 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -2729,7 +2729,7 @@ def cumsum(self, axis=0, *args, **kwargs):
     @final
     @Substitution(name="groupby")
     @Appender(_common_see_also)
-    def cummin(self, axis=0, *args, **kwargs):
+    def cummin(self, axis=0, **kwargs):
         """
         Cumulative min for each group.
 
@@ -2737,21 +2737,16 @@ def cummin(self, axis=0, *args, **kwargs):
         -------
         Series or DataFrame
         """
-        # See groupby/test_function::test_cummin_cummax, current behavior allows
-        # but ignores numeric_only
-        if "numeric_only" in kwargs:
-            kwargs = kwargs.copy()
-            del kwargs["numeric_only"]
-        nv.validate_groupby_func("cummin", args, kwargs, ["skipna"])
+        skipna = kwargs.get("skipna", True)
         if axis != 0:
             return self.apply(lambda x: np.minimum.accumulate(x, axis))
 
-        return self._cython_transform("cummin", numeric_only=False, **kwargs)
+        return self._cython_transform("cummin", numeric_only=False, skipna=skipna)
 
     @final
     @Substitution(name="groupby")
     @Appender(_common_see_also)
-    def cummax(self, axis=0, *args, **kwargs):
+    def cummax(self, axis=0, **kwargs):
         """
         Cumulative max for each group.
 
@@ -2759,16 +2754,11 @@ def cummax(self, axis=0, *args, **kwargs):
         -------
         Series or DataFrame
         """
-        # See groupby/test_function::test_cummin_cummax, current behavior allows
-        # but ignores numeric_only
-        if "numeric_only" in kwargs:
-            kwargs = kwargs.copy()
-            del kwargs["numeric_only"]
-        nv.validate_groupby_func("cummax", args, kwargs, ["skipna"])
+        skipna = kwargs.get("skipna", True)
         if axis != 0:
             return self.apply(lambda x: np.maximum.accumulate(x, axis))
 
-        return self._cython_transform("cummax", numeric_only=False, **kwargs)
+        return self._cython_transform("cummax", numeric_only=False, skipna=skipna)
 
     @final
     def _get_cythonized_result(

From 3c869ce48863d43ee1c6bccfca5ee46fae791b7b Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Mon, 7 Jun 2021 11:51:44 -0400
Subject: [PATCH 07/11] Fix build warning

---
 pandas/_libs/groupby.pyx | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index 60e4c1dbc6a40..a25069f4bb0fd 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -1388,6 +1388,9 @@ cdef cummin_max(groupby_t[:, ::1] out,
         na_val = NaN
     elif is_datetimelike:
         na_val = NPY_NAT
+    # Will never be used, just to avoid uninitialized warning
+    else:
+        na_val = 0
 
     N, K = (<object>values).shape
     seen_na = np.zeros((<object>accum).shape[0], dtype=np.uint8)

From c448837197f43b58a36b840e54775f7409da4c4a Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Sat, 12 Jun 2021 13:45:18 -0400
Subject: [PATCH 08/11] Fixup merge conflict and move whatsnew

---
 doc/source/whatsnew/v1.3.0.rst |  1 -
 pandas/_libs/groupby.pyx       | 11 +++++------
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index 3e63054e6a2cf..dd95f9088e3da 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -268,7 +268,6 @@ Other enhancements
 - :meth:`.GroupBy.any` and :meth:`.GroupBy.all` use Kleene logic with nullable data types (:issue:`37506`)
 - :meth:`.GroupBy.any` and :meth:`.GroupBy.all` return a ``BooleanDtype`` for columns with nullable data types (:issue:`33449`)
 - :meth:`.GroupBy.rank` now supports object-dtype data (:issue:`38278`)
-- :meth:`.GroupBy.cummin` and :meth:`.GroupBy.cummax` now support the argument ``skipna`` (:issue:`34047`)
 - Constructing a :class:`DataFrame` or :class:`Series` with the ``data`` argument being a Python iterable that is *not* a NumPy ``ndarray`` consisting of NumPy scalars will now result in a dtype with a precision the maximum of the NumPy scalars; this was already the case when ``data`` is a NumPy ``ndarray`` (:issue:`40908`)
 - Add keyword ``sort`` to :func:`pivot_table` to allow non-sorting of the result (:issue:`39143`)
 - Add keyword ``dropna`` to :meth:`DataFrame.value_counts` to allow counting rows that include ``NA`` values (:issue:`41325`)
diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index 32dbac106c4ba..c8c02ba3c605c 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -1339,7 +1339,6 @@ cdef group_cummin_max(groupby_t[:, ::1] out,
         True if `values` contains datetime-like entries.
     skipna : bool
         If True, ignore nans in `values`.
-
     compute_max : bool
         True if cumulative maximum should be computed, False
         if cumulative minimum should be computed
@@ -1359,14 +1358,14 @@ cdef group_cummin_max(groupby_t[:, ::1] out,
     else:
         accum[:] = -np.inf if compute_max else np.inf
 
-    if mask is not None:
+    if mask is None:
         cummin_max(out, values, labels, accum, skipna, is_datetimelike, compute_max)
     else:
         masked_cummin_max(out, values, mask, labels, accum, skipna, compute_max)
 
 
-@cython.boundscheck(False)
-@cython.wraparound(False)
+# @cython.boundscheck(False)
+# @cython.wraparound(False)
 cdef cummin_max(groupby_t[:, ::1] out,
                 ndarray[groupby_t, ndim=2] values,
                 const intp_t[:] labels,
@@ -1420,8 +1419,8 @@ cdef cummin_max(groupby_t[:, ::1] out,
                         out[i, j] = val
 
 
-@cython.boundscheck(False)
-@cython.wraparound(False)
+# @cython.boundscheck(False)
+# @cython.wraparound(False)
 cdef masked_cummin_max(groupby_t[:, ::1] out,
                        ndarray[groupby_t, ndim=2] values,
                        uint8_t[:, ::1] mask,

From 9333d5831de27cafe84615c19bba0d4fc3ade9a2 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Sat, 12 Jun 2021 13:50:30 -0400
Subject: [PATCH 09/11] Shrink diff

---
 doc/source/whatsnew/v1.4.0.rst |  2 +-
 pandas/_libs/groupby.pyx       | 14 +++++++-------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index 166ea2f0d4164..942f69e1ea4bb 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -29,7 +29,7 @@ enhancement2
 
 Other enhancements
 ^^^^^^^^^^^^^^^^^^
--
+- :meth:`.GroupBy.cummin` and :meth:`.GroupBy.cummax` now support the argument ``skipna`` (:issue:`34047`)
 -
 
 .. ---------------------------------------------------------------------------
diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index c8c02ba3c605c..f916a5c8b755a 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -1358,14 +1358,14 @@ cdef group_cummin_max(groupby_t[:, ::1] out,
     else:
         accum[:] = -np.inf if compute_max else np.inf
 
-    if mask is None:
-        cummin_max(out, values, labels, accum, skipna, is_datetimelike, compute_max)
-    else:
+    if mask is not None:
         masked_cummin_max(out, values, mask, labels, accum, skipna, compute_max)
+    else:
+        cummin_max(out, values, labels, accum, skipna, is_datetimelike, compute_max)
 
 
-# @cython.boundscheck(False)
-# @cython.wraparound(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
 cdef cummin_max(groupby_t[:, ::1] out,
                 ndarray[groupby_t, ndim=2] values,
                 const intp_t[:] labels,
@@ -1419,8 +1419,8 @@ cdef cummin_max(groupby_t[:, ::1] out,
                         out[i, j] = val
 
 
-# @cython.boundscheck(False)
-# @cython.wraparound(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
 cdef masked_cummin_max(groupby_t[:, ::1] out,
                        ndarray[groupby_t, ndim=2] values,
                        uint8_t[:, ::1] mask,

From dfe8b73e842f87ec3fa3fe561a5e6c3d2f3dec49 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Fri, 25 Jun 2021 21:29:04 -0400
Subject: [PATCH 10/11] Clean up test

---
 pandas/tests/groupby/test_function.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index fb658b0a2cadb..161e4e9cd67f4 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -816,8 +816,11 @@ def test_cummax(dtypes_for_minmax):
 def test_cummin_max_skipna(method, dtype, groups, expected_data):
     # GH-34047
     df = DataFrame({"a": Series([1, None, 2], dtype=dtype)})
-    result = getattr(df.groupby(groups)["a"], method)(skipna=False)
+    gb = df.groupby(groups)["a"]
+
+    result = getattr(gb, method)(skipna=False)
     expected = Series(expected_data, dtype=dtype, name="a")
+
     tm.assert_series_equal(result, expected)
 
 

From 5ccdf94c90b9fe3dced13cb797d3c602cc1ce71a Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Sat, 24 Jul 2021 20:38:28 -0400
Subject: [PATCH 11/11] Avoid seen na alloc, fix multicol case

---
 pandas/_libs/groupby.pyx              | 22 +++++++++++++---------
 pandas/tests/groupby/test_function.py | 12 ++++++++++++
 2 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index f916a5c8b755a..91921ba0e64c2 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -1380,19 +1380,23 @@ cdef cummin_max(groupby_t[:, ::1] out,
     cdef:
         Py_ssize_t i, j, N, K
         groupby_t val, mval, na_val
-        uint8_t[::1] seen_na
+        uint8_t[:, ::1] seen_na
         intp_t lab
+        bint na_possible
 
     if groupby_t is float64_t or groupby_t is float32_t:
         na_val = NaN
+        na_possible = True
     elif is_datetimelike:
         na_val = NPY_NAT
+        na_possible = True
     # Will never be used, just to avoid uninitialized warning
     else:
         na_val = 0
+        na_possible = False
 
-    N, K = (<object>values).shape
-    seen_na = np.zeros((<object>accum).shape[0], dtype=np.uint8)
+    if na_possible:
+        seen_na = np.zeros((<object>accum).shape, dtype=np.uint8)
 
     N, K = (<object>values).shape
     with nogil:
@@ -1401,7 +1405,7 @@ cdef cummin_max(groupby_t[:, ::1] out,
             if lab < 0:
                 continue
             for j in range(K):
-                if not skipna and seen_na[lab]:
+                if not skipna and na_possible and seen_na[lab, j]:
                     out[i, j] = na_val
                 else:
                     val = values[i, j]
@@ -1415,7 +1419,7 @@ cdef cummin_max(groupby_t[:, ::1] out,
                                 accum[lab, j] = mval = val
                         out[i, j] = mval
                     else:
-                        seen_na[lab] = 1
+                        seen_na[lab, j] = 1
                         out[i, j] = val
 
 
@@ -1435,18 +1439,18 @@ cdef masked_cummin_max(groupby_t[:, ::1] out,
     cdef:
         Py_ssize_t i, j, N, K
         groupby_t val, mval
-        uint8_t[::1] seen_na
+        uint8_t[:, ::1] seen_na
         intp_t lab
 
     N, K = (<object>values).shape
-    seen_na = np.zeros((<object>accum).shape[0], dtype=np.uint8)
+    seen_na = np.zeros((<object>accum).shape, dtype=np.uint8)
     with nogil:
         for i in range(N):
             lab = labels[i]
             if lab < 0:
                 continue
             for j in range(K):
-                if not skipna and seen_na[lab]:
+                if not skipna and seen_na[lab, j]:
                     mask[i, j] = 1
                 else:
                     if not mask[i, j]:
@@ -1460,7 +1464,7 @@ cdef masked_cummin_max(groupby_t[:, ::1] out,
                                 accum[lab, j] = mval = val
                         out[i, j] = mval
                     else:
-                        seen_na[lab] = 1
+                        seen_na[lab, j] = 1
 
 
 @cython.boundscheck(False)
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index 161e4e9cd67f4..77e5e9ba133f5 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -824,6 +824,18 @@ def test_cummin_max_skipna(method, dtype, groups, expected_data):
     tm.assert_series_equal(result, expected)
 
 
+@pytest.mark.parametrize("method", ["cummin", "cummax"])
+def test_cummin_max_skipna_multiple_cols(method):
+    # Ensure missing value in "a" doesn't cause "b" to be nan-filled
+    df = DataFrame({"a": [np.nan, 2.0, 2.0], "b": [2.0, 2.0, 2.0]})
+    gb = df.groupby([1, 1, 1])[["a", "b"]]
+
+    result = getattr(gb, method)(skipna=False)
+    expected = DataFrame({"a": [np.nan, np.nan, np.nan], "b": [2.0, 2.0, 2.0]})
+
+    tm.assert_frame_equal(result, expected)
+
+
 @td.skip_if_32bit
 @pytest.mark.parametrize("method", ["cummin", "cummax"])
 @pytest.mark.parametrize(