|
2 | 2 | Provide user facing operators for doing the split part of the
|
3 | 3 | split-apply-combine paradigm.
|
4 | 4 | """
|
5 |
| -from typing import Dict, Hashable, List, Optional, Tuple |
| 5 | +from typing import Dict, Hashable, List, Optional, Set, Tuple |
6 | 6 | import warnings
|
7 | 7 |
|
8 | 8 | import numpy as np
|
9 | 9 |
|
10 |
| -from pandas._typing import FrameOrSeries |
| 10 | +from pandas._typing import FrameOrSeries, Label |
11 | 11 | from pandas.errors import InvalidIndexError
|
12 | 12 | from pandas.util._decorators import cache_readonly
|
13 | 13 |
|
@@ -614,7 +614,7 @@ def get_grouper(
|
614 | 614 | mutated: bool = False,
|
615 | 615 | validate: bool = True,
|
616 | 616 | dropna: bool = True,
|
617 |
| -) -> Tuple["ops.BaseGrouper", List[Hashable], FrameOrSeries]: |
| 617 | +) -> Tuple["ops.BaseGrouper", Set[Label], FrameOrSeries]: |
618 | 618 | """
|
619 | 619 | Create and return a BaseGrouper, which is an internal
|
620 | 620 | mapping of how to create the grouper indexers.
|
@@ -690,13 +690,13 @@ def get_grouper(
|
690 | 690 | if isinstance(key, Grouper):
|
691 | 691 | binner, grouper, obj = key._get_grouper(obj, validate=False)
|
692 | 692 | if key.key is None:
|
693 |
| - return grouper, [], obj |
| 693 | + return grouper, set(), obj |
694 | 694 | else:
|
695 |
| - return grouper, [key.key], obj |
| 695 | + return grouper, {key.key}, obj |
696 | 696 |
|
697 | 697 | # already have a BaseGrouper, just return it
|
698 | 698 | elif isinstance(key, ops.BaseGrouper):
|
699 |
| - return key, [], obj |
| 699 | + return key, set(), obj |
700 | 700 |
|
701 | 701 | if not isinstance(key, list):
|
702 | 702 | keys = [key]
|
@@ -739,7 +739,7 @@ def get_grouper(
|
739 | 739 | levels = [level] * len(keys)
|
740 | 740 |
|
741 | 741 | groupings: List[Grouping] = []
|
742 |
| - exclusions: List[Hashable] = [] |
| 742 | + exclusions: Set[Label] = set() |
743 | 743 |
|
744 | 744 | # if the actual grouper should be obj[key]
|
745 | 745 | def is_in_axis(key) -> bool:
|
@@ -769,21 +769,21 @@ def is_in_obj(gpr) -> bool:
|
769 | 769 |
|
770 | 770 | if is_in_obj(gpr): # df.groupby(df['name'])
|
771 | 771 | in_axis, name = True, gpr.name
|
772 |
| - exclusions.append(name) |
| 772 | + exclusions.add(name) |
773 | 773 |
|
774 | 774 | elif is_in_axis(gpr): # df.groupby('name')
|
775 | 775 | if gpr in obj:
|
776 | 776 | if validate:
|
777 | 777 | obj._check_label_or_level_ambiguity(gpr, axis=axis)
|
778 | 778 | in_axis, name, gpr = True, gpr, obj[gpr]
|
779 |
| - exclusions.append(name) |
| 779 | + exclusions.add(name) |
780 | 780 | elif obj._is_level_reference(gpr, axis=axis):
|
781 | 781 | in_axis, name, level, gpr = False, None, gpr, None
|
782 | 782 | else:
|
783 | 783 | raise KeyError(gpr)
|
784 | 784 | elif isinstance(gpr, Grouper) and gpr.key is not None:
|
785 | 785 | # Add key to exclusions
|
786 |
| - exclusions.append(gpr.key) |
| 786 | + exclusions.add(gpr.key) |
787 | 787 | in_axis, name = False, None
|
788 | 788 | else:
|
789 | 789 | in_axis, name = False, None
|
|
0 commit comments