Skip to content

Commit 7268e4d

Browse files
jbrockmendelTLouf
authored andcommitted
REF: do less in Grouping.__init__ (pandas-dev#41375)
1 parent 84597e3 commit 7268e4d

File tree

2 files changed

+55
-38
lines changed

2 files changed

+55
-38
lines changed

pandas/core/groupby/generic.py

+1-5
Original file line numberDiff line numberDiff line change
@@ -777,11 +777,7 @@ def apply_series_value_counts():
777777
# multi-index components
778778
codes = self.grouper.reconstructed_codes
779779
codes = [rep(level_codes) for level_codes in codes] + [llab(lab, inc)]
780-
# error: List item 0 has incompatible type "Union[ndarray, Any]";
781-
# expected "Index"
782-
levels = [ping.group_index for ping in self.grouper.groupings] + [
783-
lev # type: ignore[list-item]
784-
]
780+
levels = [ping.group_index for ping in self.grouper.groupings] + [lev]
785781
names = self.grouper.names + [self.obj.name]
786782

787783
if dropna:

pandas/core/groupby/grouper.py

+54-33
Original file line numberDiff line numberDiff line change
@@ -438,6 +438,9 @@ class Grouping:
438438
* groups : dict of {group -> label_list}
439439
"""
440440

441+
_codes: np.ndarray | None = None
442+
_group_index: Index | None = None
443+
441444
def __init__(
442445
self,
443446
index: Index,
@@ -461,27 +464,25 @@ def __init__(
461464
self.in_axis = in_axis
462465
self.dropna = dropna
463466

467+
self._passed_categorical = False
468+
464469
# right place for this?
465470
if isinstance(grouper, (Series, Index)) and name is None:
466471
self.name = grouper.name
467472

468473
# we have a single grouper which may be a myriad of things,
469474
# some of which are dependent on the passing in level
470475

471-
if level is not None:
472-
if not isinstance(level, int):
473-
if level not in index.names:
474-
raise AssertionError(f"Level {level} not in index")
475-
level = index.names.index(level)
476-
476+
ilevel = self._ilevel
477+
if ilevel is not None:
477478
if self.name is None:
478-
self.name = index.names[level]
479+
self.name = index.names[ilevel]
479480

480481
(
481-
self.grouper,
482+
self.grouper, # Index
482483
self._codes,
483484
self._group_index,
484-
) = index._get_grouper_for_level(self.grouper, level)
485+
) = index._get_grouper_for_level(self.grouper, ilevel)
485486

486487
# a passed Grouper like, directly get the grouper in the same way
487488
# as single grouper groupby, use the group_info to get codes
@@ -502,32 +503,13 @@ def __init__(
502503
self.grouper = grouper._get_grouper()
503504

504505
else:
505-
506506
# a passed Categorical
507507
if is_categorical_dtype(self.grouper):
508+
self._passed_categorical = True
508509

509510
self.grouper, self.all_grouper = recode_for_groupby(
510511
self.grouper, self.sort, observed
511512
)
512-
categories = self.grouper.categories
513-
514-
# we make a CategoricalIndex out of the cat grouper
515-
# preserving the categories / ordered attributes
516-
self._codes = self.grouper.codes
517-
if observed:
518-
codes = algorithms.unique1d(self.grouper.codes)
519-
codes = codes[codes != -1]
520-
if sort or self.grouper.ordered:
521-
codes = np.sort(codes)
522-
else:
523-
codes = np.arange(len(categories))
524-
525-
self._group_index = CategoricalIndex(
526-
Categorical.from_codes(
527-
codes=codes, categories=categories, ordered=self.grouper.ordered
528-
),
529-
name=self.name,
530-
)
531513

532514
# we are done
533515
elif isinstance(self.grouper, Grouping):
@@ -564,8 +546,20 @@ def __repr__(self) -> str:
564546
def __iter__(self):
565547
return iter(self.indices)
566548

567-
_codes: np.ndarray | None = None
568-
_group_index: Index | None = None
549+
@cache_readonly
550+
def _ilevel(self) -> int | None:
551+
"""
552+
If necessary, converted index level name to index level position.
553+
"""
554+
level = self.level
555+
if level is None:
556+
return None
557+
if not isinstance(level, int):
558+
index = self.index
559+
if level not in index.names:
560+
raise AssertionError(f"Level {level} not in index")
561+
return index.names.index(level)
562+
return level
569563

570564
@property
571565
def ngroups(self) -> int:
@@ -582,6 +576,12 @@ def indices(self):
582576

583577
@property
584578
def codes(self) -> np.ndarray:
579+
if self._passed_categorical:
580+
# we make a CategoricalIndex out of the cat grouper
581+
# preserving the categories / ordered attributes
582+
cat = self.grouper
583+
return cat.codes
584+
585585
if self._codes is None:
586586
self._make_codes()
587587
# error: Incompatible return value type (got "Optional[ndarray]",
@@ -592,12 +592,33 @@ def codes(self) -> np.ndarray:
592592
def result_index(self) -> Index:
593593
if self.all_grouper is not None:
594594
group_idx = self.group_index
595-
assert isinstance(group_idx, CategoricalIndex) # set in __init__
595+
assert isinstance(group_idx, CategoricalIndex)
596596
return recode_from_groupby(self.all_grouper, self.sort, group_idx)
597597
return self.group_index
598598

599-
@property
599+
@cache_readonly
600600
def group_index(self) -> Index:
601+
if self._passed_categorical:
602+
# we make a CategoricalIndex out of the cat grouper
603+
# preserving the categories / ordered attributes
604+
cat = self.grouper
605+
categories = cat.categories
606+
607+
if self.observed:
608+
codes = algorithms.unique1d(cat.codes)
609+
codes = codes[codes != -1]
610+
if self.sort or cat.ordered:
611+
codes = np.sort(codes)
612+
else:
613+
codes = np.arange(len(categories))
614+
615+
return CategoricalIndex(
616+
Categorical.from_codes(
617+
codes=codes, categories=categories, ordered=cat.ordered
618+
),
619+
name=self.name,
620+
)
621+
601622
if self._group_index is None:
602623
self._make_codes()
603624
assert self._group_index is not None

0 commit comments

Comments
 (0)