From 98b53d78124951087c8a1fa9a00a70748273ab19 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 6 Nov 2019 20:41:55 -0800
Subject: [PATCH 1/8] Annotate groupby.ops

---
 pandas/core/groupby/ops.py | 33 ++++++++++++++++++++-------------
 1 file changed, 20 insertions(+), 13 deletions(-)

diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 2c8aa1294451d..16a32c30a9857 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -106,7 +106,7 @@ def __iter__(self):
     def nkeys(self) -> int:
         return len(self.groupings)
 
-    def get_iterator(self, data, axis=0):
+    def get_iterator(self, data: NDFrame, axis: int = 0):
         """
         Groupby iterator
 
@@ -120,7 +120,7 @@ def get_iterator(self, data, axis=0):
         for key, (i, group) in zip(keys, splitter):
             yield key, group
 
-    def _get_splitter(self, data, axis=0):
+    def _get_splitter(self, data: NDFrame, axis: int = 0) -> "DataSplitter":
         comp_ids, _, ngroups = self.group_info
         return get_splitter(data, comp_ids, ngroups, axis=axis)
 
@@ -142,7 +142,7 @@ def _get_group_keys(self):
             # provide "flattened" iterator for multi-group setting
             return get_flattened_iterator(comp_ids, ngroups, self.levels, self.codes)
 
-    def apply(self, f, data, axis: int = 0):
+    def apply(self, f, data: NDFrame, axis: int = 0):
         mutated = self.mutated
         splitter = self._get_splitter(data, axis=axis)
         group_keys = self._get_group_keys()
@@ -157,7 +157,7 @@ def apply(self, f, data, axis: int = 0):
 
         elif (
             com.get_callable_name(f) not in base.plotting_methods
-            and hasattr(splitter, "fast_apply")
+            and splitter.fast_apply is not None
             and axis == 0
             # with MultiIndex, apply_frame_axis0 would raise InvalidApply
             # TODO: can we make this check prettier?
@@ -292,7 +292,7 @@ def recons_codes(self):
         return decons_obs_group_ids(comp_ids, obs_ids, self.shape, codes, xnull=True)
 
     @cache_readonly
-    def result_index(self):
+    def result_index(self) -> Index:
         if not self.compressed and len(self.groupings) == 1:
             return self.groupings[0].result_index.rename(self.names[0])
 
@@ -607,7 +607,7 @@ def agg_series(self, obj: Series, func):
                 raise
         return self._aggregate_series_pure_python(obj, func)
 
-    def _aggregate_series_fast(self, obj, func):
+    def _aggregate_series_fast(self, obj: Series, func):
         # At this point we have already checked that obj.index is not a MultiIndex
         #  and that obj is backed by an ndarray, not ExtensionArray
         func = self._is_builtin_func(func)
@@ -623,7 +623,7 @@ def _aggregate_series_fast(self, obj, func):
         result, counts = grouper.get_result()
         return result, counts
 
-    def _aggregate_series_pure_python(self, obj, func):
+    def _aggregate_series_pure_python(self, obj: Series, func):
 
         group_index, _, ngroups = self.group_info
 
@@ -682,7 +682,12 @@ class BinGrouper(BaseGrouper):
     """
 
     def __init__(
-        self, bins, binlabels, filter_empty=False, mutated=False, indexer=None
+        self,
+        bins,
+        binlabels,
+        filter_empty: bool = False,
+        mutated: bool = False,
+        indexer=None,
     ):
         self.bins = ensure_int64(bins)
         self.binlabels = ensure_index(binlabels)
@@ -825,7 +830,9 @@ def _is_indexed_like(obj, axes) -> bool:
 
 
 class DataSplitter:
-    def __init__(self, data, labels, ngroups, axis: int = 0):
+    fast_apply = None
+
+    def __init__(self, data: NDFrame, labels, ngroups: int, axis: int = 0):
         self.data = data
         self.labels = ensure_int64(labels)
         self.ngroups = ngroups
@@ -856,7 +863,7 @@ def __iter__(self):
         for i, (start, end) in enumerate(zip(starts, ends)):
             yield i, self._chop(sdata, slice(start, end))
 
-    def _get_sorted_data(self):
+    def _get_sorted_data(self) -> NDFrame:
         return self.data.take(self.sort_idx, axis=self.axis)
 
     def _chop(self, sdata, slice_obj: slice):
@@ -864,7 +871,7 @@ def _chop(self, sdata, slice_obj: slice):
 
 
 class SeriesSplitter(DataSplitter):
-    def _chop(self, sdata, slice_obj: slice):
+    def _chop(self, sdata: Series, slice_obj: slice) -> Series:
         return sdata._get_values(slice_obj)
 
 
@@ -876,14 +883,14 @@ def fast_apply(self, f, names):
         sdata = self._get_sorted_data()
         return libreduction.apply_frame_axis0(sdata, f, names, starts, ends)
 
-    def _chop(self, sdata, slice_obj: slice):
+    def _chop(self, sdata: DataFrame, slice_obj: slice) -> DataFrame:
         if self.axis == 0:
             return sdata.iloc[slice_obj]
         else:
             return sdata._slice(slice_obj, axis=1)
 
 
-def get_splitter(data: NDFrame, *args, **kwargs):
+def get_splitter(data: NDFrame, *args, **kwargs) -> DataSplitter:
     if isinstance(data, Series):
         klass = SeriesSplitter  # type: Type[DataSplitter]
     else:

From efd4a9b856b36484958c63531289ed78b17693f7 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 6 Nov 2019 20:51:41 -0800
Subject: [PATCH 2/8] annotations, needs debugging

---
 pandas/core/groupby/grouper.py |  2 +-
 pandas/core/groupby/ops.py     | 13 ++++++-------
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
index dc6336b17ac1e..dc079190eb6bc 100644
--- a/pandas/core/groupby/grouper.py
+++ b/pandas/core/groupby/grouper.py
@@ -672,7 +672,7 @@ def is_in_obj(gpr) -> bool:
     return grouper, exclusions, obj
 
 
-def _is_label_like(val):
+def _is_label_like(val) -> bool:
     return isinstance(val, (str, tuple)) or (val is not None and is_scalar(val))
 
 
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 16a32c30a9857..830c977885b6f 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -41,7 +41,8 @@
 import pandas.core.common as com
 from pandas.core.frame import DataFrame
 from pandas.core.generic import NDFrame
-from pandas.core.groupby import base, grouper
+from pandas.core.groupby import base
+from pandas.core.groupby.grouper import Grouping
 from pandas.core.index import Index, MultiIndex, ensure_index
 from pandas.core.series import Series
 from pandas.core.sorting import (
@@ -79,7 +80,7 @@ class BaseGrouper:
     def __init__(
         self,
         axis: Index,
-        groupings: "Sequence[grouper.Grouping]",
+        groupings: Sequence[Grouping],
         sort: bool = True,
         group_keys: bool = True,
         mutated: bool = False,
@@ -89,7 +90,7 @@ def __init__(
 
         self._filter_empty_groups = self.compressed = len(groupings) != 1
         self.axis = axis
-        self.groupings = groupings  # type: Sequence[grouper.Grouping]
+        self.groupings = list(groupings)  # type: List[Grouping]
         self.sort = sort
         self.group_keys = group_keys
         self.mutated = mutated
@@ -788,9 +789,7 @@ def names(self):
         return [self.binlabels.name]
 
     @property
-    def groupings(self):
-        from pandas.core.groupby.grouper import Grouping
-
+    def groupings(self) -> List[Grouping]:
         return [
             Grouping(lvl, lvl, in_axis=False, level=None, name=name)
             for lvl, name in zip(self.levels, self.names)
@@ -866,7 +865,7 @@ def __iter__(self):
     def _get_sorted_data(self) -> NDFrame:
         return self.data.take(self.sort_idx, axis=self.axis)
 
-    def _chop(self, sdata, slice_obj: slice):
+    def _chop(self, sdata: NDFrame, slice_obj: slice) -> NDFrame:
         raise AbstractMethodError(self)
 
 

From 19332773191033db159f1be659be3d1c8bce37ff Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 6 Nov 2019 21:02:25 -0800
Subject: [PATCH 3/8] whitespace

---
 pandas/core/groupby/ops.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 830c977885b6f..ebeb55ac1d323 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -230,8 +230,7 @@ def names(self):
 
     def size(self) -> Series:
         """
-        Compute group sizes
-
+        Compute group sizes.
         """
         ids, _, ngroup = self.group_info
         ids = ensure_platform_int(ids)

From 9b6a87ab94ce4527dda1436ce520ce94b9e35ce7 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 6 Nov 2019 21:04:32 -0800
Subject: [PATCH 4/8] types

---
 pandas/core/groupby/grouper.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
index dc079190eb6bc..e120e605d616d 100644
--- a/pandas/core/groupby/grouper.py
+++ b/pandas/core/groupby/grouper.py
@@ -372,8 +372,8 @@ def __init__(
 
                 self.grouper = self.grouper.astype("timedelta64[ns]")
 
-    def __repr__(self):
-        return "Grouping({0})".format(self.name)
+    def __repr__(self) -> str:
+        return "Grouping({name})".format(name=self.name)
 
     def __iter__(self):
         return iter(self.indices)
@@ -434,10 +434,10 @@ def _get_grouper(
     key=None,
     axis: int = 0,
     level=None,
-    sort=True,
-    observed=False,
-    mutated=False,
-    validate=True,
+    sort: bool = True,
+    observed: bool = False,
+    mutated: bool = False,
+    validate: bool = True,
 ):
     """
     create and return a BaseGrouper, which is an internal

From d52add414042fc379c88edff90f174840eb850c2 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 6 Nov 2019 21:20:17 -0800
Subject: [PATCH 5/8] circular import

---
 pandas/core/groupby/grouper.py | 13 +++++++------
 pandas/core/groupby/ops.py     | 11 +++++------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
index e120e605d616d..bae725f723715 100644
--- a/pandas/core/groupby/grouper.py
+++ b/pandas/core/groupby/grouper.py
@@ -120,7 +120,7 @@ def __init__(self, key=None, level=None, freq=None, axis=0, sort=False):
     def ax(self):
         return self.grouper
 
-    def _get_grouper(self, obj, validate=True):
+    def _get_grouper(self, obj, validate: bool = True):
         """
         Parameters
         ----------
@@ -144,17 +144,18 @@ def _get_grouper(self, obj, validate=True):
         )
         return self.binner, self.grouper, self.obj
 
-    def _set_grouper(self, obj, sort=False):
+    def _set_grouper(self, obj: NDFrame, sort: bool = False):
         """
         given an object and the specifications, setup the internal grouper
         for this particular specification
 
         Parameters
         ----------
-        obj : the subject object
+        obj : Series or DataFrame
         sort : bool, default False
             whether the resulting grouper should be sorted
         """
+        assert obj is not None
 
         if self.key is not None and self.level is not None:
             raise ValueError("The Grouper cannot specify both a key and a level!")
@@ -210,15 +211,15 @@ def _set_grouper(self, obj, sort=False):
     def groups(self):
         return self.grouper.groups
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         attrs_list = (
-            "{}={!r}".format(attr_name, getattr(self, attr_name))
+            "{name}={val!r}".format(name=attr_name, val=getattr(self, attr_name))
             for attr_name in self._attributes
             if getattr(self, attr_name) is not None
         )
         attrs = ", ".join(attrs_list)
         cls_name = self.__class__.__name__
-        return "{}({})".format(cls_name, attrs)
+        return "{cls}({attrs})".format(cls=cls_name, attrs=attrs)
 
 
 class Grouping:
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index ebeb55ac1d323..9c87a6b1b9c41 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -41,8 +41,7 @@
 import pandas.core.common as com
 from pandas.core.frame import DataFrame
 from pandas.core.generic import NDFrame
-from pandas.core.groupby import base
-from pandas.core.groupby.grouper import Grouping
+from pandas.core.groupby import base, grouper
 from pandas.core.index import Index, MultiIndex, ensure_index
 from pandas.core.series import Series
 from pandas.core.sorting import (
@@ -80,7 +79,7 @@ class BaseGrouper:
     def __init__(
         self,
         axis: Index,
-        groupings: Sequence[Grouping],
+        groupings: "Sequence[grouper.Grouping]",
         sort: bool = True,
         group_keys: bool = True,
         mutated: bool = False,
@@ -90,7 +89,7 @@ def __init__(
 
         self._filter_empty_groups = self.compressed = len(groupings) != 1
         self.axis = axis
-        self.groupings = list(groupings)  # type: List[Grouping]
+        self.groupings = list(groupings)  # type: List[grouper.Grouping]
         self.sort = sort
         self.group_keys = group_keys
         self.mutated = mutated
@@ -788,9 +787,9 @@ def names(self):
         return [self.binlabels.name]
 
     @property
-    def groupings(self) -> List[Grouping]:
+    def groupings(self) -> "List[grouper.Grouping]":
         return [
-            Grouping(lvl, lvl, in_axis=False, level=None, name=name)
+            grouper.Grouping(lvl, lvl, in_axis=False, level=None, name=name)
             for lvl, name in zip(self.levels, self.names)
         ]
 

From a7e6ad16eb7ceab3dee6bc366a99a60ae0d8ac53 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Thu, 7 Nov 2019 09:52:28 -0800
Subject: [PATCH 6/8] fix msot mypy complaints

---
 pandas/core/groupby/ops.py | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 9c87a6b1b9c41..cbe012012fd29 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -36,6 +36,7 @@
 )
 from pandas.core.dtypes.missing import _maybe_fill, isna
 
+from pandas._typing import FrameOrSeries
 import pandas.core.algorithms as algorithms
 from pandas.core.base import SelectionMixin
 import pandas.core.common as com
@@ -106,7 +107,7 @@ def __iter__(self):
     def nkeys(self) -> int:
         return len(self.groupings)
 
-    def get_iterator(self, data: NDFrame, axis: int = 0):
+    def get_iterator(self, data: FrameOrSeries, axis: int = 0):
         """
         Groupby iterator
 
@@ -120,7 +121,7 @@ def get_iterator(self, data: NDFrame, axis: int = 0):
         for key, (i, group) in zip(keys, splitter):
             yield key, group
 
-    def _get_splitter(self, data: NDFrame, axis: int = 0) -> "DataSplitter":
+    def _get_splitter(self, data: FrameOrSeries, axis: int = 0) -> "DataSplitter":
         comp_ids, _, ngroups = self.group_info
         return get_splitter(data, comp_ids, ngroups, axis=axis)
 
@@ -142,13 +143,13 @@ def _get_group_keys(self):
             # provide "flattened" iterator for multi-group setting
             return get_flattened_iterator(comp_ids, ngroups, self.levels, self.codes)
 
-    def apply(self, f, data: NDFrame, axis: int = 0):
+    def apply(self, f, data: FrameOrSeries, axis: int = 0):
         mutated = self.mutated
         splitter = self._get_splitter(data, axis=axis)
         group_keys = self._get_group_keys()
         result_values = None
 
-        sdata = splitter._get_sorted_data()
+        sdata = splitter._get_sorted_data()  # type: FrameOrSeries
         if sdata.ndim == 2 and np.any(sdata.dtypes.apply(is_extension_array_dtype)):
             # calling splitter.fast_apply will raise TypeError via apply_frame_axis0
             #  if we pass EA instead of ndarray
@@ -157,7 +158,7 @@ def apply(self, f, data: NDFrame, axis: int = 0):
 
         elif (
             com.get_callable_name(f) not in base.plotting_methods
-            and splitter.fast_apply is not None
+            and isinstance(splitter, FrameSplitter)
             and axis == 0
             # with MultiIndex, apply_frame_axis0 would raise InvalidApply
             # TODO: can we make this check prettier?
@@ -720,7 +721,7 @@ def _get_grouper(self):
         """
         return self
 
-    def get_iterator(self, data: NDFrame, axis: int = 0):
+    def get_iterator(self, data: FrameOrSeries, axis: int = 0):
         """
         Groupby iterator
 
@@ -827,9 +828,7 @@ def _is_indexed_like(obj, axes) -> bool:
 
 
 class DataSplitter:
-    fast_apply = None
-
-    def __init__(self, data: NDFrame, labels, ngroups: int, axis: int = 0):
+    def __init__(self, data: FrameOrSeries, labels, ngroups: int, axis: int = 0):
         self.data = data
         self.labels = ensure_int64(labels)
         self.ngroups = ngroups
@@ -860,10 +859,10 @@ def __iter__(self):
         for i, (start, end) in enumerate(zip(starts, ends)):
             yield i, self._chop(sdata, slice(start, end))
 
-    def _get_sorted_data(self) -> NDFrame:
+    def _get_sorted_data(self) -> FrameOrSeries:
         return self.data.take(self.sort_idx, axis=self.axis)
 
-    def _chop(self, sdata: NDFrame, slice_obj: slice) -> NDFrame:
+    def _chop(self, sdata, slice_obj: slice) -> NDFrame:
         raise AbstractMethodError(self)
 
 
@@ -887,7 +886,7 @@ def _chop(self, sdata: DataFrame, slice_obj: slice) -> DataFrame:
             return sdata._slice(slice_obj, axis=1)
 
 
-def get_splitter(data: NDFrame, *args, **kwargs) -> DataSplitter:
+def get_splitter(data: FrameOrSeries, *args, **kwargs) -> DataSplitter:
     if isinstance(data, Series):
         klass = SeriesSplitter  # type: Type[DataSplitter]
     else:

From f03830247e16a583fc0467bc10b02ac8a0fc23ff Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Thu, 7 Nov 2019 14:00:27 -0800
Subject: [PATCH 7/8] fix mypy groupings

---
 pandas/core/groupby/ops.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 55fa3ad6ab435..f8e75f0d3782d 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -90,12 +90,16 @@ def __init__(
 
         self._filter_empty_groups = self.compressed = len(groupings) != 1
         self.axis = axis
-        self.groupings = list(groupings)  # type: List[grouper.Grouping]
+        self._groupings = list(groupings)  # type: List[grouper.Grouping]
         self.sort = sort
         self.group_keys = group_keys
         self.mutated = mutated
         self.indexer = indexer
 
+    @property
+    def groupings(self) -> List["grouper.Grouping"]:
+        return self._groupings
+
     @property
     def shape(self):
         return tuple(ping.ngroups for ping in self.groupings)

From 0b28143c5f7611776ac6d1677ae0aa8600078f09 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 8 Nov 2019 09:56:30 -0800
Subject: [PATCH 8/8] merge cleanup

---
 pandas/core/groupby/grouper.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
index 47d9510e8d59a..e6e3ee62459ca 100644
--- a/pandas/core/groupby/grouper.py
+++ b/pandas/core/groupby/grouper.py
@@ -143,7 +143,7 @@ def _get_grouper(self, obj, validate: bool = True):
         )
         return self.binner, self.grouper, self.obj
 
-    def _set_grouper(self, obj: NDFrame, sort: bool = False):
+    def _set_grouper(self, obj: FrameOrSeries, sort: bool = False):
         """
         given an object and the specifications, setup the internal grouper
         for this particular specification
@@ -435,6 +435,9 @@ def get_grouper(
     axis: int = 0,
     level=None,
     sort: bool = True,
+    observed: bool = False,
+    mutated: bool = False,
+    validate: bool = True,
 ) -> Tuple[BaseGrouper, List[Hashable], FrameOrSeries]:
     """
     Create and return a BaseGrouper, which is an internal