From 6057696196ca38cae063f1dc0498fe05ccaeaf4a Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 9 Oct 2019 19:46:57 -0700
Subject: [PATCH 1/5] REF: use fused types for group_last

---
 pandas/_libs/groupby_helper.pxi.in | 123 +++++++++++++++++++----------
 1 file changed, 80 insertions(+), 43 deletions(-)
diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in
index 000689f634545..209f701969f6c 100644
--- a/pandas/_libs/groupby_helper.pxi.in
+++ b/pandas/_libs/groupby_helper.pxi.in
@@ -12,39 +12,27 @@ _int64_max = np.iinfo(np.int64).max
 # group_nth, group_last, group_rank
 # ----------------------------------------------------------------------
 
-{{py:
-
-# name, c_type, nan_val
-dtypes = [('float64', 'float64_t', 'NAN'),
-          ('float32', 'float32_t', 'NAN'),
-          ('int64', 'int64_t', 'NPY_NAT'),
-          ('object', 'object', 'NAN')]
-
-def get_dispatch(dtypes):
-
-    for name, c_type, nan_val in dtypes:
-
-        yield name, c_type, nan_val
-}}
-
-
-{{for name, c_type, nan_val in get_dispatch(dtypes)}}
+ctypedef fused rank_t:
+    float64_t
+    float32_t
+    int64_t
+    object
 
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def group_last_{{name}}({{c_type}}[:, :] out,
-                        int64_t[:] counts,
-                        {{c_type}}[:, :] values,
-                        const int64_t[:] labels,
-                        Py_ssize_t min_count=-1):
+def group_last(rank_t[:, :] out,
+               int64_t[:] counts,
+               rank_t[:, :] values,
+               const int64_t[:] labels,
+               Py_ssize_t min_count=-1):
     """
     Only aggregates on axis=0
     """
     cdef:
         Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
-        {{c_type}} val
-        ndarray[{{c_type}}, ndim=2] resx
+        rank_t val
+        ndarray[rank_t, ndim=2] resx
         ndarray[int64_t, ndim=2] nobs
 
     assert min_count == -1, "'min_count' only used in add and prod"
@@ -53,19 +41,15 @@ def group_last_{{name}}({{c_type}}[:, :] out,
         raise AssertionError("len(index) != len(labels)")
 
     nobs = np.zeros((<object>out).shape, dtype=np.int64)
-    {{if name == 'object'}}
-    resx = np.empty((<object>out).shape, dtype=object)
-    {{else}}
-    resx = np.empty_like(out)
-    {{endif}}
+    if rank_t is object:
+        resx = np.empty((<object>out).shape, dtype=object)
+    else:
+        resx = np.empty_like(out)
 
     N, K = (<object>values).shape
 
-    {{if name == "object"}}
-    if True:  # make templating happy
-    {{else}}
-    with nogil:
-    {{endif}}
+    if rank_t is object:
+        # TODO: De-duplicate once conditional-nogil is available
         for i in range(N):
             lab = labels[i]
             if lab < 0:
@@ -76,20 +60,71 @@ def group_last_{{name}}({{c_type}}[:, :] out,
                 val = values[i, j]
 
                 # not nan
-                if (
-                {{if not name.startswith("int")}}
-                val == val and
-                {{endif}}
-                val != {{nan_val}}):
+                if val == val:
                     nobs[lab, j] += 1
                     resx[lab, j] = val
 
         for i in range(ncounts):
             for j in range(K):
                 if nobs[i, j] == 0:
-                    out[i, j] = {{nan_val}}
+                    out[i, j] = NAN
                 else:
                     out[i, j] = resx[i, j]
+    else:
+        with nogil:
+            for i in range(N):
+                lab = labels[i]
+                if lab < 0:
+                    continue
+
+                counts[lab] += 1
+                for j in range(K):
+                    val = values[i, j]
+
+                    # not nan
+                    if rank_t is int64_t:
+                        # need a special notna check
+                        if val != NPY_NAT:
+                            nobs[lab, j] += 1
+                            resx[lab, j] = val
+                    else:
+                        if val == val:
+                            nobs[lab, j] += 1
+                            resx[lab, j] = val
+
+            for i in range(ncounts):
+                for j in range(K):
+                    if nobs[i, j] == 0:
+                        if rank_t is int64_t:
+                            out[i, j] = NPY_NAT
+                        else:
+                            out[i, j] = NAN
+                    else:
+                        out[i, j] = resx[i, j]
+
+group_last_float64 = group_last["float64_t"]
+group_last_float32 = group_last["float32_t"]
+group_last_int64 = group_last["int64_t"]
+group_last_object = group_last["object"]
+
+
+{{py:
+
+# name, c_type, nan_val
+dtypes = [('float64', 'float64_t', 'NAN'),
+          ('float32', 'float32_t', 'NAN'),
+          ('int64', 'int64_t', 'NPY_NAT'),
+          ('object', 'object', 'NAN')]
+
+def get_dispatch(dtypes):
+
+    for name, c_type, nan_val in dtypes:
+
+        yield name, c_type, nan_val
+}}
+
+
+{{for name, c_type, nan_val in get_dispatch(dtypes)}}
 
 
 @cython.wraparound(False)
@@ -484,7 +519,8 @@ def group_cummin(groupby_t[:, :] out,
                  const int64_t[:] labels,
                  int ngroups,
                  bint is_datetimelike):
-    """Cumulative minimum of columns of `values`, in row groups `labels`.
+    """
+    Cumulative minimum of columns of `values`, in row groups `labels`.
 
     Parameters
     ----------
@@ -548,9 +584,10 @@ def group_cummin(groupby_t[:, :] out,
 def group_cummax(groupby_t[:, :] out,
                  groupby_t[:, :] values,
                  const int64_t[:] labels,
-		 int ngroups,
+		         int ngroups,
                  bint is_datetimelike):
-    """Cumulative maximum of columns of `values`, in row groups `labels`.
+    """
+    Cumulative maximum of columns of `values`, in row groups `labels`.
 
     Parameters
     ----------

From dfcd5efa3726ffd17779b48b88a43b1d834ff518 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 9 Oct 2019 20:44:26 -0700
Subject: [PATCH 2/5] REF: use fused types for groupby_helper

---
 pandas/_libs/groupby_helper.pxi.in | 152 ++++++++++++++++-------------
 1 file changed, 84 insertions(+), 68 deletions(-)

diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in
index 209f701969f6c..03c4ae58f4dc5 100644
--- a/pandas/_libs/groupby_helper.pxi.in
+++ b/pandas/_libs/groupby_helper.pxi.in
@@ -108,39 +108,20 @@ group_last_int64 = group_last["int64_t"]
 group_last_object = group_last["object"]
 
 
-{{py:
-
-# name, c_type, nan_val
-dtypes = [('float64', 'float64_t', 'NAN'),
-          ('float32', 'float32_t', 'NAN'),
-          ('int64', 'int64_t', 'NPY_NAT'),
-          ('object', 'object', 'NAN')]
-
-def get_dispatch(dtypes):
-
-    for name, c_type, nan_val in dtypes:
-
-        yield name, c_type, nan_val
-}}
-
-
-{{for name, c_type, nan_val in get_dispatch(dtypes)}}
-
-
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def group_nth_{{name}}({{c_type}}[:, :] out,
-                       int64_t[:] counts,
-                       {{c_type}}[:, :] values,
-                       const int64_t[:] labels, int64_t rank,
-                       Py_ssize_t min_count=-1):
+def group_nth(rank_t[:, :] out,
+              int64_t[:] counts,
+              rank_t[:, :] values,
+              const int64_t[:] labels, int64_t rank,
+              Py_ssize_t min_count=-1):
     """
     Only aggregates on axis=0
     """
     cdef:
         Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
-        {{c_type}} val
-        ndarray[{{c_type}}, ndim=2] resx
+        rank_t val
+        ndarray[rank_t, ndim=2] resx
         ndarray[int64_t, ndim=2] nobs
 
     assert min_count == -1, "'min_count' only used in add and prod"
@@ -149,19 +130,15 @@ def group_nth_{{name}}({{c_type}}[:, :] out,
         raise AssertionError("len(index) != len(labels)")
 
     nobs = np.zeros((<object>out).shape, dtype=np.int64)
-    {{if name=='object'}}
-    resx = np.empty((<object>out).shape, dtype=object)
-    {{else}}
-    resx = np.empty_like(out)
-    {{endif}}
+    if rank_t is object:
+        resx = np.empty((<object>out).shape, dtype=object)
+    else:
+        resx = np.empty_like(out)
 
     N, K = (<object>values).shape
 
-    {{if name == "object"}}
-    if True:  # make templating happy
-    {{else}}
-    with nogil:
-    {{endif}}
+    if rank_t is object:
+        # TODO: De-duplicate once conditional-nogil is available
         for i in range(N):
             lab = labels[i]
             if lab < 0:
@@ -172,11 +149,7 @@ def group_nth_{{name}}({{c_type}}[:, :] out,
                 val = values[i, j]
 
                 # not nan
-                if (
-                {{if not name.startswith("int")}}
-                val == val and
-                {{endif}}
-                val != {{nan_val}}):
+                if val == val:
                     nobs[lab, j] += 1
                     if nobs[lab, j] == rank:
                         resx[lab, j] = val
@@ -184,28 +157,65 @@ def group_nth_{{name}}({{c_type}}[:, :] out,
         for i in range(ncounts):
             for j in range(K):
                 if nobs[i, j] == 0:
-                    out[i, j] = {{nan_val}}
+                    out[i, j] = NAN
                 else:
                     out[i, j] = resx[i, j]
 
+    else:
+        with nogil:
+            for i in range(N):
+                lab = labels[i]
+                if lab < 0:
+                    continue
 
-{{if name != 'object'}}
+                counts[lab] += 1
+                for j in range(K):
+                    val = values[i, j]
+
+                    # not nan
+                    if rank_t is int64_t:
+                        # need a special notna check
+                        if val != NPY_NAT:
+                            nobs[lab, j] += 1
+                            if nobs[lab, j] == rank:
+                                resx[lab, j] = val
+                    else:
+                        if val == val:
+                            nobs[lab, j] += 1
+                            if nobs[lab, j] == rank:
+                                resx[lab, j] = val
+
+            for i in range(ncounts):
+                for j in range(K):
+                    if nobs[i, j] == 0:
+                        if rank_t is int64_t:
+                            out[i, j] = NPY_NAT
+                        else:
+                            out[i, j] = NAN
+                    else:
+                        out[i, j] = resx[i, j]
+
+
+group_nth_float64 = group_nth["float64_t"]
+group_nth_float32 = group_nth["float32_t"]
+group_nth_int64 = group_nth["int64_t"]
+group_nth_object = group_nth["object"]
 
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def group_rank_{{name}}(float64_t[:, :] out,
-                        {{c_type}}[:, :] values,
-                        const int64_t[:] labels,
-                        bint is_datetimelike, object ties_method,
-                        bint ascending, bint pct, object na_option):
+def group_rank(float64_t[:, :] out,
+               rank_t[:, :] values,
+               const int64_t[:] labels,
+               bint is_datetimelike, object ties_method,
+               bint ascending, bint pct, object na_option):
     """
     Provides the rank of values within each group.
 
     Parameters
     ----------
     out : array of float64_t values which this method will write its results to
-    values : array of {{c_type}} values to be ranked
+    values : array of rank_t values to be ranked
     labels : array containing unique label for each group, with its ordering
         matching up to the corresponding record in `values`
     is_datetimelike : bool, default False
@@ -238,10 +248,13 @@ def group_rank_{{name}}(float64_t[:, :] out,
         Py_ssize_t grp_vals_seen=1, grp_na_count=0, grp_tie_count=0
         ndarray[int64_t] _as
         ndarray[float64_t, ndim=2] grp_sizes
-        ndarray[{{c_type}}] masked_vals
+        ndarray[rank_t] masked_vals
         ndarray[uint8_t] mask
         bint keep_na
-        {{c_type}} nan_fill_val
+        rank_t nan_fill_val
+
+    if rank_t is object:
+        raise NotImplementedError("Cant do nogil")
 
     tiebreak = tiebreakers[ties_method]
     keep_na = na_option == 'keep'
@@ -252,25 +265,23 @@ def group_rank_{{name}}(float64_t[:, :] out,
     # with mask, without obfuscating location of missing data
     # in values array
     masked_vals = np.array(values[:, 0], copy=True)
-    {{if name == 'int64'}}
-    mask = (masked_vals == {{nan_val}}).astype(np.uint8)
-    {{else}}
-    mask = np.isnan(masked_vals).astype(np.uint8)
-    {{endif}}
+    if rank_t is int64_t:
+        mask = (masked_vals == NPY_NAT).astype(np.uint8)
+    else:
+        mask = np.isnan(masked_vals).astype(np.uint8)
 
     if ascending ^ (na_option == 'top'):
-        {{if name == 'int64'}}
-        nan_fill_val = np.iinfo(np.int64).max
-        {{else}}
-        nan_fill_val = np.inf
-        {{endif}}
+        if rank_t is int64_t:
+            nan_fill_val = np.iinfo(np.int64).max
+        else:
+            nan_fill_val = np.inf
         order = (masked_vals, mask, labels)
     else:
-        {{if name == 'int64'}}
-        nan_fill_val = np.iinfo(np.int64).min
-        {{else}}
-        nan_fill_val = -np.inf
-        {{endif}}
+        if rank_t is int64_t:
+            nan_fill_val = np.iinfo(np.int64).min
+        else:
+            nan_fill_val = -np.inf
+
         order = (masked_vals, ~mask, labels)
     np.putmask(masked_vals, mask, nan_fill_val)
 
@@ -372,8 +383,13 @@ def group_rank_{{name}}(float64_t[:, :] out,
                     out[i, 0] = NAN
                 elif grp_sizes[i, 0] != 0:
                     out[i, 0] = out[i, 0] / grp_sizes[i, 0]
-{{endif}}
-{{endfor}}
+
+
+group_rank_float64 = group_rank["float64_t"]
+group_rank_float32 = group_rank["float32_t"]
+group_rank_int64 = group_rank["int64_t"]
+# Note: we do not have a group_rank_object because that would require a
+#  not-nogil implementation.
 
 
 # ----------------------------------------------------------------------

From 12c728819a8510592ee79df01f7cf3227685521b Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Thu, 10 Oct 2019 06:26:50 -0700
Subject: [PATCH 3/5] fix whitesapce

---
 pandas/_libs/groupby_helper.pxi.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in
index 03c4ae58f4dc5..d93a65ab96484 100644
--- a/pandas/_libs/groupby_helper.pxi.in
+++ b/pandas/_libs/groupby_helper.pxi.in
@@ -600,7 +600,7 @@ def group_cummin(groupby_t[:, :] out,
 def group_cummax(groupby_t[:, :] out,
                  groupby_t[:, :] values,
                  const int64_t[:] labels,
-		         int ngroups,
+                 int ngroups,
                  bint is_datetimelike):
     """
     Cumulative maximum of columns of `values`, in row groups `labels`.

From 42ff41458c8be6848ff56827193fd490ba116886 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Thu, 10 Oct 2019 13:21:02 -0700
Subject: [PATCH 4/5] suggested edit

---
 pandas/_libs/groupby_helper.pxi.in | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in
index d93a65ab96484..3ce8c742c58f7 100644
--- a/pandas/_libs/groupby_helper.pxi.in
+++ b/pandas/_libs/groupby_helper.pxi.in
@@ -60,14 +60,23 @@ def group_last(rank_t[:, :] out,
                 val = values[i, j]
 
                 # not nan
-                if val == val:
-                    nobs[lab, j] += 1
-                    resx[lab, j] = val
+                if rank_t is int64_t:
+                    # need a special notna check
+                    if val != NPY_NAT:
+                        nobs[lab, j] += 1
+                        resx[lab, j] = val
+                else:
+                    if val == val:
+                        nobs[lab, j] += 1
+                        resx[lab, j] = val
 
         for i in range(ncounts):
             for j in range(K):
                 if nobs[i, j] == 0:
-                    out[i, j] = NAN
+                    if rank_t is int64_t:
+                        out[i, j] = NPY_NAT
+                    else:
+                        out[i, j] = NAN
                 else:
                     out[i, j] = resx[i, j]
     else:

From 76b20c6fe3ede2c3ee398888e587735698a6efb0 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Thu, 10 Oct 2019 13:21:25 -0700
Subject: [PATCH 5/5] comment

---
 pandas/_libs/groupby_helper.pxi.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in
index 3ce8c742c58f7..6b434b6470581 100644
--- a/pandas/_libs/groupby_helper.pxi.in
+++ b/pandas/_libs/groupby_helper.pxi.in
@@ -398,7 +398,7 @@ group_rank_float64 = group_rank["float64_t"]
 group_rank_float32 = group_rank["float32_t"]
 group_rank_int64 = group_rank["int64_t"]
 # Note: we do not have a group_rank_object because that would require a
-#  not-nogil implementation.
+#  not-nogil implementation, see GH#19560
 
 
 # ----------------------------------------------------------------------