diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 31d6e2206f569..e73be29d5b104 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -379,9 +379,9 @@ def __init__( self.mutated = kwargs.pop("mutated", False) if grouper is None: - from pandas.core.groupby.grouper import _get_grouper + from pandas.core.groupby.grouper import get_grouper - grouper, exclusions, obj = _get_grouper( + grouper, exclusions, obj = get_grouper( obj, keys, axis=axis, @@ -1802,9 +1802,9 @@ def nth(self, n: Union[int, List[int]], dropna: Optional[str] = None) -> DataFra # create a grouper with the original parameters, but on dropped # object - from pandas.core.groupby.grouper import _get_grouper + from pandas.core.groupby.grouper import get_grouper - grouper, _, _ = _get_grouper( + grouper, _, _ = get_grouper( dropped, key=self.keys, axis=self.axis, diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index ff3b4b1096ecb..370abe75e1327 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -3,7 +3,7 @@ split-apply-combine paradigm. """ -from typing import Optional, Tuple +from typing import Hashable, List, Optional, Tuple import warnings import numpy as np @@ -26,7 +26,6 @@ from pandas.core.arrays import Categorical, ExtensionArray import pandas.core.common as com from pandas.core.frame import DataFrame -from pandas.core.generic import NDFrame from pandas.core.groupby.categorical import recode_for_groupby, recode_from_groupby from pandas.core.groupby.ops import BaseGrouper from pandas.core.index import CategoricalIndex, Index, MultiIndex @@ -134,7 +133,7 @@ def _get_grouper(self, obj, validate=True): """ self._set_grouper(obj) - self.grouper, exclusions, self.obj = _get_grouper( + self.grouper, exclusions, self.obj = get_grouper( self.obj, [self.key], axis=self.axis, @@ -429,8 +428,8 @@ def groups(self) -> dict: return self.index.groupby(Categorical.from_codes(self.codes, self.group_index)) -def _get_grouper( - obj: NDFrame, +def get_grouper( + obj: FrameOrSeries, key=None, axis: int = 0, level=None, @@ -438,9 +437,9 @@ def _get_grouper( observed=False, mutated=False, validate=True, -): +) -> Tuple[BaseGrouper, List[Hashable], FrameOrSeries]: """ - create and return a BaseGrouper, which is an internal + Create and return a BaseGrouper, which is an internal mapping of how to create the grouper indexers. This may be composed of multiple Grouping objects, indicating multiple groupers @@ -456,9 +455,9 @@ def _get_grouper( a BaseGrouper. If observed & we have a categorical grouper, only show the observed - values + values. - If validate, then check for key/level overlaps + If validate, then check for key/level overlaps. """ group_axis = obj._get_axis(axis) @@ -517,7 +516,7 @@ def _get_grouper( if key.key is None: return grouper, [], obj else: - return grouper, {key.key}, obj + return grouper, [key.key], obj # already have a BaseGrouper, just return it elif isinstance(key, BaseGrouper): @@ -530,10 +529,8 @@ def _get_grouper( # unhashable elements of `key`. Any unhashable elements implies that # they wanted a list of keys. # https://github.com/pandas-dev/pandas/issues/18314 - is_tuple = isinstance(key, tuple) - all_hashable = is_tuple and is_hashable(key) - - if is_tuple: + if isinstance(key, tuple): + all_hashable = is_hashable(key) if ( all_hashable and key not in obj and set(key).issubset(obj) ) or not all_hashable: @@ -573,7 +570,8 @@ def _get_grouper( all_in_columns_index = all( g in obj.columns or g in obj.index.names for g in keys ) - elif isinstance(obj, Series): + else: + assert isinstance(obj, Series) all_in_columns_index = all(g in obj.index.names for g in keys) if not all_in_columns_index: @@ -586,8 +584,8 @@ def _get_grouper( else: levels = [level] * len(keys) - groupings = [] - exclusions = [] + groupings = [] # type: List[Grouping] + exclusions = [] # type: List[Hashable] # if the actual grouper should be obj[key] def is_in_axis(key) -> bool: