TYP: exclusions in BaseGroupBy (#36559)

rhshadrach · web-flow · commit 9e1bd7c31d01 · 2020-09-22T18:16:04.000-04:00
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -24,6 +24,7 @@ class providing the base-class of operations.
     Mapping,
     Optional,
     Sequence,
+    Set,
     Tuple,
     Type,
     TypeVar,
@@ -36,7 +37,7 @@ class providing the base-class of operations.
 
 from pandas._libs import Timestamp, lib
 import pandas._libs.groupby as libgroupby
-from pandas._typing import F, FrameOrSeries, FrameOrSeriesUnion, Scalar
+from pandas._typing import F, FrameOrSeries, FrameOrSeriesUnion, Label, Scalar
 from pandas.compat.numpy import function as nv
 from pandas.errors import AbstractMethodError
 from pandas.util._decorators import Appender, Substitution, cache_readonly, doc
@@ -488,7 +489,7 @@ def __init__(
         axis: int = 0,
         level=None,
         grouper: Optional["ops.BaseGrouper"] = None,
-        exclusions=None,
+        exclusions: Optional[Set[Label]] = None,
         selection=None,
         as_index: bool = True,
         sort: bool = True,
@@ -537,7 +538,7 @@ def __init__(
         self.obj = obj
         self.axis = obj._get_axis_number(axis)
         self.grouper = grouper
-        self.exclusions = set(exclusions) if exclusions else set()
+        self.exclusions = exclusions or set()
 
     def __len__(self) -> int:
         return len(self.groups)
diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
@@ -2,12 +2,12 @@
 Provide user facing operators for doing the split part of the
 split-apply-combine paradigm.
 """
-from typing import Dict, Hashable, List, Optional, Tuple
+from typing import Dict, Hashable, List, Optional, Set, Tuple
 import warnings
 
 import numpy as np
 
-from pandas._typing import FrameOrSeries
+from pandas._typing import FrameOrSeries, Label
 from pandas.errors import InvalidIndexError
 from pandas.util._decorators import cache_readonly
 
@@ -614,7 +614,7 @@ def get_grouper(
     mutated: bool = False,
     validate: bool = True,
     dropna: bool = True,
-) -> Tuple["ops.BaseGrouper", List[Hashable], FrameOrSeries]:
+) -> Tuple["ops.BaseGrouper", Set[Label], FrameOrSeries]:
     """
     Create and return a BaseGrouper, which is an internal
     mapping of how to create the grouper indexers.
@@ -690,13 +690,13 @@ def get_grouper(
     if isinstance(key, Grouper):
         binner, grouper, obj = key._get_grouper(obj, validate=False)
         if key.key is None:
-            return grouper, [], obj
+            return grouper, set(), obj
         else:
-            return grouper, [key.key], obj
+            return grouper, {key.key}, obj
 
     # already have a BaseGrouper, just return it
     elif isinstance(key, ops.BaseGrouper):
-        return key, [], obj
+        return key, set(), obj
 
     if not isinstance(key, list):
         keys = [key]
@@ -739,7 +739,7 @@ def get_grouper(
         levels = [level] * len(keys)
 
     groupings: List[Grouping] = []
-    exclusions: List[Hashable] = []
+    exclusions: Set[Label] = set()
 
     # if the actual grouper should be obj[key]
     def is_in_axis(key) -> bool:
@@ -769,21 +769,21 @@ def is_in_obj(gpr) -> bool:
 
         if is_in_obj(gpr):  # df.groupby(df['name'])
             in_axis, name = True, gpr.name
-            exclusions.append(name)
+            exclusions.add(name)
 
         elif is_in_axis(gpr):  # df.groupby('name')
             if gpr in obj:
                 if validate:
                     obj._check_label_or_level_ambiguity(gpr, axis=axis)
                 in_axis, name, gpr = True, gpr, obj[gpr]
-                exclusions.append(name)
+                exclusions.add(name)
             elif obj._is_level_reference(gpr, axis=axis):
                 in_axis, name, level, gpr = False, None, gpr, None
             else:
                 raise KeyError(gpr)
         elif isinstance(gpr, Grouper) and gpr.key is not None:
             # Add key to exclusions
-            exclusions.append(gpr.key)
+            exclusions.add(gpr.key)
             in_axis, name = False, None
         else:
             in_axis, name = False, None