TYP: annotations in core.groupby (#35939)

jbrockmendel · web-flow · commit 132e19173265 · 2020-08-31T11:16:15.000+01:00
diff --git a/pandas/core/groupby/categorical.py b/pandas/core/groupby/categorical.py
@@ -1,3 +1,5 @@
+from typing import Optional, Tuple
+
 import numpy as np
 
 from pandas.core.algorithms import unique1d
@@ -6,9 +8,12 @@
     CategoricalDtype,
     recode_for_categories,
 )
+from pandas.core.indexes.api import CategoricalIndex
 
 
-def recode_for_groupby(c: Categorical, sort: bool, observed: bool):
+def recode_for_groupby(
+    c: Categorical, sort: bool, observed: bool
+) -> Tuple[Categorical, Optional[Categorical]]:
     """
     Code the categories to ensure we can groupby for categoricals.
 
@@ -73,7 +78,9 @@ def recode_for_groupby(c: Categorical, sort: bool, observed: bool):
     return c.reorder_categories(cat.categories), None
 
 
-def recode_from_groupby(c: Categorical, sort: bool, ci):
+def recode_from_groupby(
+    c: Categorical, sort: bool, ci: CategoricalIndex
+) -> CategoricalIndex:
     """
     Reverse the codes_to_groupby to account for sort / observed.
 
@@ -91,7 +98,8 @@ def recode_from_groupby(c: Categorical, sort: bool, ci):
     """
     # we re-order to the original category orderings
     if sort:
-        return ci.set_categories(c.categories)
+        return ci.set_categories(c.categories)  # type: ignore [attr-defined]
 
     # we are not sorting, so add unobserved to the end
-    return ci.add_categories(c.categories[~c.categories.isin(ci.categories)])
+    new_cats = c.categories[~c.categories.isin(ci.categories)]
+    return ci.add_categories(new_cats)  # type: ignore [attr-defined]
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -23,6 +23,7 @@
     Type,
     TypeVar,
     Union,
+    cast,
 )
 import warnings
 
@@ -83,7 +84,7 @@
 from pandas.plotting import boxplot_frame_groupby
 
 if TYPE_CHECKING:
-    from pandas.core.internals import Block
+    from pandas.core.internals import Block  # noqa:F401
 
 
 NamedAgg = namedtuple("NamedAgg", ["column", "aggfunc"])
@@ -1591,7 +1592,7 @@ def _gotitem(self, key, ndim: int, subset=None):
         Parameters
         ----------
         key : string / list of selections
-        ndim : 1,2
+        ndim : {1, 2}
             requested ndim of result
         subset : object, default None
             subset to act on
@@ -1617,7 +1618,7 @@ def _gotitem(self, key, ndim: int, subset=None):
 
         raise AssertionError("invalid ndim for _gotitem")
 
-    def _wrap_frame_output(self, result, obj) -> DataFrame:
+    def _wrap_frame_output(self, result, obj: DataFrame) -> DataFrame:
         result_index = self.grouper.levels[0]
 
         if self.axis == 0:
@@ -1634,20 +1635,14 @@ def _get_data_to_aggregate(self) -> BlockManager:
         else:
             return obj._mgr
 
-    def _insert_inaxis_grouper_inplace(self, result):
+    def _insert_inaxis_grouper_inplace(self, result: DataFrame) -> None:
         # zip in reverse so we can always insert at loc 0
-        izip = zip(
-            *map(
-                reversed,
-                (
-                    self.grouper.names,
-                    self.grouper.get_group_levels(),
-                    [grp.in_axis for grp in self.grouper.groupings],
-                ),
-            )
-        )
         columns = result.columns
-        for name, lev, in_axis in izip:
+        for name, lev, in_axis in zip(
+            reversed(self.grouper.names),
+            reversed(self.grouper.get_group_levels()),
+            reversed([grp.in_axis for grp in self.grouper.groupings]),
+        ):
             # GH #28549
             # When using .apply(-), name will be in columns already
             if in_axis and name not in columns:
@@ -1712,7 +1707,7 @@ def _wrap_transformed_output(
 
         return result
 
-    def _wrap_agged_blocks(self, blocks: "Sequence[Block]", items: Index) -> DataFrame:
+    def _wrap_agged_blocks(self, blocks: Sequence["Block"], items: Index) -> DataFrame:
         if not self.as_index:
             index = np.arange(blocks[0].values.shape[-1])
             mgr = BlockManager(blocks, axes=[items, index])
@@ -1739,7 +1734,7 @@ def _iterate_column_groupbys(self):
                 exclusions=self.exclusions,
             )
 
-    def _apply_to_column_groupbys(self, func):
+    def _apply_to_column_groupbys(self, func) -> DataFrame:
         from pandas.core.reshape.concat import concat
 
         return concat(
@@ -1748,7 +1743,7 @@ def _apply_to_column_groupbys(self, func):
             axis=1,
         )
 
-    def count(self):
+    def count(self) -> DataFrame:
         """
         Compute count of group, excluding missing values.
 
@@ -1778,7 +1773,7 @@ def count(self):
 
         return self._reindex_output(result, fill_value=0)
 
-    def nunique(self, dropna: bool = True):
+    def nunique(self, dropna: bool = True) -> DataFrame:
         """
         Return DataFrame with counts of unique elements in each position.
 
@@ -1844,6 +1839,7 @@ def nunique(self, dropna: bool = True):
             ],
             axis=1,
         )
+        results = cast(DataFrame, results)
 
         if axis_number == 1:
             results = results.T
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -459,7 +459,7 @@ def f(self):
 
 
 @contextmanager
-def _group_selection_context(groupby):
+def _group_selection_context(groupby: "_GroupBy"):
     """
     Set / reset the _group_selection_context.
     """
@@ -489,7 +489,7 @@ def __init__(
         keys: Optional[_KeysArgType] = None,
         axis: int = 0,
         level=None,
-        grouper: "Optional[ops.BaseGrouper]" = None,
+        grouper: Optional["ops.BaseGrouper"] = None,
         exclusions=None,
         selection=None,
         as_index: bool = True,
@@ -734,7 +734,7 @@ def pipe(self, func, *args, **kwargs):
 
     plot = property(GroupByPlot)
 
-    def _make_wrapper(self, name):
+    def _make_wrapper(self, name: str) -> Callable:
         assert name in self._apply_allowlist
 
         with _group_selection_context(self):
diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
@@ -568,7 +568,9 @@ def codes(self) -> np.ndarray:
     @cache_readonly
     def result_index(self) -> Index:
         if self.all_grouper is not None:
-            return recode_from_groupby(self.all_grouper, self.sort, self.group_index)
+            group_idx = self.group_index
+            assert isinstance(group_idx, CategoricalIndex)  # set in __init__
+            return recode_from_groupby(self.all_grouper, self.sort, group_idx)
         return self.group_index
 
     @property
@@ -607,7 +609,7 @@ def get_grouper(
     mutated: bool = False,
     validate: bool = True,
     dropna: bool = True,
-) -> "Tuple[ops.BaseGrouper, List[Hashable], FrameOrSeries]":
+) -> Tuple["ops.BaseGrouper", List[Hashable], FrameOrSeries]:
     """
     Create and return a BaseGrouper, which is an internal
     mapping of how to create the grouper indexers.
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
@@ -82,7 +82,7 @@ class BaseGrouper:
     def __init__(
         self,
         axis: Index,
-        groupings: "Sequence[grouper.Grouping]",
+        groupings: Sequence["grouper.Grouping"],
         sort: bool = True,
         group_keys: bool = True,
         mutated: bool = False,