38
38
import pandas .core .common as com
39
39
from pandas .core .frame import DataFrame
40
40
from pandas .core .groupby import ops
41
- from pandas .core .groupby .categorical import (
42
- recode_for_groupby ,
43
- recode_from_groupby ,
44
- )
41
+ from pandas .core .groupby .categorical import CategoricalGrouper
45
42
from pandas .core .indexes .api import (
46
- CategoricalIndex ,
47
43
Index ,
48
44
MultiIndex ,
49
45
)
@@ -461,8 +457,7 @@ class Grouping:
461
457
462
458
_codes : npt .NDArray [np .signedinteger ] | None = None
463
459
_group_index : Index | None = None
464
- _passed_categorical : bool
465
- _all_grouper : Categorical | None
460
+ _cat_info : CategoricalGrouper | None = None
466
461
_index : Index
467
462
468
463
def __init__ (
@@ -479,16 +474,12 @@ def __init__(
479
474
self .level = level
480
475
self ._orig_grouper = grouper
481
476
self .grouping_vector = _convert_grouper (index , grouper )
482
- self ._all_grouper = None
483
477
self ._index = index
484
478
self ._sort = sort
485
479
self .obj = obj
486
- self ._observed = observed
487
480
self .in_axis = in_axis
488
481
self ._dropna = dropna
489
482
490
- self ._passed_categorical = False
491
-
492
483
# we have a single grouper which may be a myriad of things,
493
484
# some of which are dependent on the passing in level
494
485
@@ -527,13 +518,10 @@ def __init__(
527
518
self .grouping_vector = Index (ng , name = newgrouper .result_index .name )
528
519
529
520
elif is_categorical_dtype (self .grouping_vector ):
530
- # a passed Categorical
531
- self ._passed_categorical = True
532
-
533
- self .grouping_vector , self ._all_grouper = recode_for_groupby (
534
- self .grouping_vector , sort , observed
521
+ self ._cat_info = CategoricalGrouper .make (
522
+ self .grouping_vector , sort , observed , dropna = dropna
535
523
)
536
-
524
+ self . grouping_vector = self . _cat_info . new_grouping_vector
537
525
elif not isinstance (
538
526
self .grouping_vector , (Series , Index , ExtensionArray , np .ndarray )
539
527
):
@@ -631,20 +619,23 @@ def group_arraylike(self) -> ArrayLike:
631
619
# _group_index is set in __init__ for MultiIndex cases
632
620
return self ._group_index ._values
633
621
634
- elif self ._all_grouper is not None :
622
+ elif (
623
+ self ._cat_info is not None
624
+ and self ._cat_info .original_grouping_vector is not None
625
+ ):
635
626
# retain dtype for categories, including unobserved ones
636
627
return self .result_index ._values
637
628
638
629
return self ._codes_and_uniques [1 ]
639
630
640
631
@cache_readonly
641
632
def result_index (self ) -> Index :
642
- # result_index retains dtype for categories, including unobserved ones,
643
- # which group_index does not
644
- if self . _all_grouper is not None :
645
- group_idx = self . group_index
646
- assert isinstance ( group_idx , CategoricalIndex )
647
- return recode_from_groupby ( self ._all_grouper , self ._sort , group_idx )
633
+ """
634
+ result_index retains dtype for categories, including unobserved ones,
635
+ which group_index does not
636
+ """
637
+ if self . _cat_info is not None :
638
+ return self ._cat_info . result_index ( self .group_index )
648
639
return self .group_index
649
640
650
641
@cache_readonly
@@ -658,25 +649,8 @@ def group_index(self) -> Index:
658
649
659
650
@cache_readonly
660
651
def _codes_and_uniques (self ) -> tuple [npt .NDArray [np .signedinteger ], ArrayLike ]:
661
- if self ._passed_categorical :
662
- # we make a CategoricalIndex out of the cat grouper
663
- # preserving the categories / ordered attributes
664
- cat = self .grouping_vector
665
- categories = cat .categories
666
-
667
- if self ._observed :
668
- ucodes = algorithms .unique1d (cat .codes )
669
- ucodes = ucodes [ucodes != - 1 ]
670
- if self ._sort or cat .ordered :
671
- ucodes = np .sort (ucodes )
672
- else :
673
- ucodes = np .arange (len (categories ))
674
-
675
- uniques = Categorical .from_codes (
676
- codes = ucodes , categories = categories , ordered = cat .ordered
677
- )
678
- return cat .codes , uniques
679
-
652
+ if self ._cat_info is not None :
653
+ return self ._cat_info .codes_and_uniques (self .grouping_vector )
680
654
elif isinstance (self .grouping_vector , ops .BaseGrouper ):
681
655
# we have a list of groupers
682
656
codes = self .grouping_vector .codes_info
0 commit comments