From 3dfef1a4a7569088e0adc419a9cf6d012e4f6bc7 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 25 Oct 2019 19:28:18 -0700
Subject: [PATCH 1/5] WIP: libgroupby getattr pattern

---
 pandas/_libs/groupby.pyx   |  4 ++--
 pandas/core/groupby/ops.py | 25 +++++++------------------
 2 files changed, 9 insertions(+), 20 deletions(-)

diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index b2ffbb3ecb4f2..e613889ce440e 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -1019,8 +1019,8 @@ def group_nth(rank_t[:, :] out,
 def group_rank(float64_t[:, :] out,
                rank_t[:, :] values,
                const int64_t[:] labels,
-               bint is_datetimelike, object ties_method,
-               bint ascending, bint pct, object na_option):
+               bint is_datetimelike, object ties_method="average",
+               bint ascending=True, bint pct=False, object na_option="keep"):
     """
     Provides the rank of values within each group.
 
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 79b51ef57cd37..3c3ff588172a2 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -372,7 +372,7 @@ def get_group_levels(self):
             "min": "group_min",
             "max": "group_max",
             "mean": "group_mean",
-            "median": {"name": "group_median"},
+            "median": "group_median",
             "var": "group_var",
             "first": {
                 "name": "group_nth",
@@ -386,19 +386,7 @@ def get_group_levels(self):
             "cumsum": "group_cumsum",
             "cummin": "group_cummin",
             "cummax": "group_cummax",
-            "rank": {
-                "name": "group_rank",
-                "f": lambda func, a, b, c, d, e, **kwargs: func(
-                    a,
-                    b,
-                    c,
-                    e,
-                    kwargs.get("ties_method", "average"),
-                    kwargs.get("ascending", True),
-                    kwargs.get("pct", False),
-                    kwargs.get("na_option", "keep"),
-                ),
-            },
+            "rank": "group_rank",
         },
     }
 
@@ -445,6 +433,7 @@ def get_func(fname):
         ftype = self._cython_functions[kind][how]
 
         if isinstance(ftype, dict):
+            # we only get here with (kind, how) == ("aggregate", "first")
             func = afunc = get_func(ftype["name"])
 
             # a sub-function
@@ -575,7 +564,6 @@ def _cython_operation(self, kind, values, how, axis, min_count=-1, **kwargs):
                 values,
                 labels,
                 func,
-                is_numeric,
                 is_datetimelike,
                 min_count,
             )
@@ -586,7 +574,7 @@ def _cython_operation(self, kind, values, how, axis, min_count=-1, **kwargs):
 
             # TODO: min_count
             result = self._transform(
-                result, values, labels, func, is_numeric, is_datetimelike, **kwargs
+                result, values, labels, func, is_datetimelike, **kwargs
             )
 
         if is_integer_dtype(result) and not is_datetimelike:
@@ -633,7 +621,6 @@ def _aggregate(
         values,
         comp_ids,
         agg_func,
-        is_numeric,
         is_datetimelike,
         min_count=-1,
     ):
@@ -651,7 +638,6 @@ def _transform(
         values,
         comp_ids,
         transform_func,
-        is_numeric,
         is_datetimelike,
         **kwargs
     ):
@@ -660,6 +646,9 @@ def _transform(
         if values.ndim > 2:
             # punting for now
             raise NotImplementedError("number of dimensions is currently limited to 2")
+        elif transform_func is libgroupby.group_rank:
+            # different signature from the others
+            transform_func(result, values, comp_ids, is_datetimelike=is_datetimelike, **kwargs)
         else:
             transform_func(result, values, comp_ids, ngroups, is_datetimelike, **kwargs)
 

From cd67a415ca0e2e21a68cb35359ef2c1b2e94a045 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sun, 27 Oct 2019 11:04:09 -0700
Subject: [PATCH 2/5] revert median

---
 pandas/core/groupby/ops.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 3c3ff588172a2..7198ddac805eb 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -372,7 +372,7 @@ def get_group_levels(self):
             "min": "group_min",
             "max": "group_max",
             "mean": "group_mean",
-            "median": "group_median",
+            "median": {"name": "group_median"},
             "var": "group_var",
             "first": {
                 "name": "group_nth",
@@ -433,7 +433,8 @@ def get_func(fname):
         ftype = self._cython_functions[kind][how]
 
         if isinstance(ftype, dict):
-            # we only get here with (kind, how) == ("aggregate", "first")
+            # we only get here with kind == "aggregate" and
+            #  how == "first" or "median"
             func = afunc = get_func(ftype["name"])
 
             # a sub-function

From 2e0e76a5bb20205130ae096fc5c9fa81abc4abb3 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sun, 27 Oct 2019 11:07:38 -0700
Subject: [PATCH 3/5] blackify

---
 pandas/core/groupby/ops.py | 29 ++++++-----------------------
 1 file changed, 6 insertions(+), 23 deletions(-)

diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 7198ddac805eb..72b97ee761e23 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -560,13 +560,7 @@ def _cython_operation(self, kind, values, how, axis, min_count=-1, **kwargs):
             )
             counts = np.zeros(self.ngroups, dtype=np.int64)
             result = self._aggregate(
-                result,
-                counts,
-                values,
-                labels,
-                func,
-                is_datetimelike,
-                min_count,
+                result, counts, values, labels, func, is_datetimelike, min_count
             )
         elif kind == "transform":
             result = _maybe_fill(
@@ -616,14 +610,7 @@ def transform(self, values, how, axis=0, **kwargs):
         return self._cython_operation("transform", values, how, axis, **kwargs)
 
     def _aggregate(
-        self,
-        result,
-        counts,
-        values,
-        comp_ids,
-        agg_func,
-        is_datetimelike,
-        min_count=-1,
+        self, result, counts, values, comp_ids, agg_func, is_datetimelike, min_count=-1
     ):
         if values.ndim > 2:
             # punting for now
@@ -634,13 +621,7 @@ def _aggregate(
         return result
 
     def _transform(
-        self,
-        result,
-        values,
-        comp_ids,
-        transform_func,
-        is_datetimelike,
-        **kwargs
+        self, result, values, comp_ids, transform_func, is_datetimelike, **kwargs
     ):
 
         comp_ids, _, ngroups = self.group_info
@@ -649,7 +630,9 @@ def _transform(
             raise NotImplementedError("number of dimensions is currently limited to 2")
         elif transform_func is libgroupby.group_rank:
             # different signature from the others
-            transform_func(result, values, comp_ids, is_datetimelike=is_datetimelike, **kwargs)
+            transform_func(
+                result, values, comp_ids, is_datetimelike=is_datetimelike, **kwargs
+            )
         else:
             transform_func(result, values, comp_ids, ngroups, is_datetimelike, **kwargs)
 

From ce253d8e62f92a31f127584bb75b057770af595c Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 29 Oct 2019 20:03:10 -0700
Subject: [PATCH 4/5] REF: remove libgroupby dict cases

---
 pandas/_libs/groupby.pyx   |  2 +-
 pandas/core/groupby/ops.py | 29 +++++++----------------------
 2 files changed, 8 insertions(+), 23 deletions(-)

diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index 1261d7392f4d5..217010b85c8f9 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -931,7 +931,7 @@ def group_last(rank_t[:, :] out,
 def group_nth(rank_t[:, :] out,
               int64_t[:] counts,
               rank_t[:, :] values,
-              const int64_t[:] labels, int64_t rank,
+              const int64_t[:] labels, int64_t rank=1,
               Py_ssize_t min_count=-1):
     """
     Only aggregates on axis=0
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 81fbb81f3f423..4dd30a3a5c55e 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -319,12 +319,9 @@ def get_group_levels(self):
             "min": "group_min",
             "max": "group_max",
             "mean": "group_mean",
-            "median": {"name": "group_median"},
+            "median": "group_median",
             "var": "group_var",
-            "first": {
-                "name": "group_nth",
-                "f": lambda func, a, b, c, d, e: func(a, b, c, d, 1, -1),
-            },
+            "first": "group_nth",
             "last": "group_last",
             "ohlc": "group_ohlc",
         },
@@ -379,23 +376,7 @@ def get_func(fname):
 
         ftype = self._cython_functions[kind][how]
 
-        if isinstance(ftype, dict):
-            # we only get here with kind == "aggregate" and
-            #  how == "first" or "median"
-            func = afunc = get_func(ftype["name"])
-
-            # a sub-function
-            f = ftype.get("f")
-            if f is not None:
-
-                def wrapper(*args, **kwargs):
-                    return f(afunc, *args, **kwargs)
-
-                # need to curry our sub-function
-                func = wrapper
-
-        else:
-            func = get_func(ftype)
+        func = get_func(ftype)
 
         if func is None:
             raise NotImplementedError(
@@ -562,6 +543,10 @@ def _aggregate(
         if values.ndim > 2:
             # punting for now
             raise NotImplementedError("number of dimensions is currently limited to 2")
+        elif agg_func is libgroupby.group_nth:
+            # different signature from the others
+            # TODO: should we be using min_count instead of hard-coding it?
+            agg_func(result, counts, values, comp_ids, rank=1, min_count=-1)
         else:
             agg_func(result, counts, values, comp_ids, min_count)
 

From eed591d57906c23fe66b0863a55ebb49da0e6a09 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 30 Oct 2019 16:16:40 -0700
Subject: [PATCH 5/5] add dummy arg for group_rank

---
 pandas/_libs/groupby.pyx   | 4 ++++
 pandas/core/groupby/ops.py | 5 -----
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index 217010b85c8f9..98be97def3dda 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -1022,6 +1022,7 @@ def group_nth(rank_t[:, :] out,
 def group_rank(float64_t[:, :] out,
                rank_t[:, :] values,
                const int64_t[:] labels,
+               int ngroups,
                bint is_datetimelike, object ties_method="average",
                bint ascending=True, bint pct=False, object na_option="keep"):
     """
@@ -1033,6 +1034,9 @@ def group_rank(float64_t[:, :] out,
     values : array of rank_t values to be ranked
     labels : array containing unique label for each group, with its ordering
         matching up to the corresponding record in `values`
+    ngroups : int
+        This parameter is not used, is needed to match signatures of other
+        groupby functions.
     is_datetimelike : bool, default False
         unused in this method but provided for call compatibility with other
         Cython transformations
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 4dd30a3a5c55e..2a7fd079679a4 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -560,11 +560,6 @@ def _transform(
         if values.ndim > 2:
             # punting for now
             raise NotImplementedError("number of dimensions is currently limited to 2")
-        elif transform_func is libgroupby.group_rank:
-            # different signature from the others
-            transform_func(
-                result, values, comp_ids, is_datetimelike=is_datetimelike, **kwargs
-            )
         else:
             transform_func(result, values, comp_ids, ngroups, is_datetimelike, **kwargs)