8
8
TYPE_CHECKING ,
9
9
Any ,
10
10
Hashable ,
11
+ cast ,
11
12
final ,
12
13
)
13
14
import warnings
14
15
15
16
import numpy as np
16
17
17
18
from pandas ._typing import (
19
+ AnyArrayLike ,
18
20
ArrayLike ,
19
21
NDFrameT ,
20
22
npt ,
38
40
import pandas .core .common as com
39
41
from pandas .core .frame import DataFrame
40
42
from pandas .core .groupby import ops
41
- from pandas .core .groupby .categorical import (
42
- recode_for_groupby ,
43
- recode_from_groupby ,
44
- )
43
+ from pandas .core .groupby .categorical import CategoricalGrouper
45
44
from pandas .core .indexes .api import (
46
- CategoricalIndex ,
47
45
Index ,
48
46
MultiIndex ,
49
47
)
@@ -461,8 +459,7 @@ class Grouping:
461
459
462
460
_codes : npt .NDArray [np .signedinteger ] | None = None
463
461
_group_index : Index | None = None
464
- _passed_categorical : bool
465
- _all_grouper : Categorical | None
462
+ _cat_grouper : CategoricalGrouper | None = None
466
463
_index : Index
467
464
468
465
def __init__ (
@@ -479,16 +476,12 @@ def __init__(
479
476
self .level = level
480
477
self ._orig_grouper = grouper
481
478
self .grouping_vector = _convert_grouper (index , grouper )
482
- self ._all_grouper = None
483
479
self ._index = index
484
480
self ._sort = sort
485
481
self .obj = obj
486
- self ._observed = observed
487
482
self .in_axis = in_axis
488
483
self ._dropna = dropna
489
484
490
- self ._passed_categorical = False
491
-
492
485
# we have a single grouper which may be a myriad of things,
493
486
# some of which are dependent on the passing in level
494
487
@@ -527,13 +520,10 @@ def __init__(
527
520
self .grouping_vector = Index (ng , name = newgrouper .result_index .name )
528
521
529
522
elif is_categorical_dtype (self .grouping_vector ):
530
- # a passed Categorical
531
- self ._passed_categorical = True
532
-
533
- self .grouping_vector , self ._all_grouper = recode_for_groupby (
523
+ self ._cat_grouper = CategoricalGrouper .make (
534
524
self .grouping_vector , sort , observed
535
525
)
536
-
526
+ self . grouping_vector = self . _cat_grouper . new_grouping_vector
537
527
elif not isinstance (
538
528
self .grouping_vector , (Series , Index , ExtensionArray , np .ndarray )
539
529
):
@@ -631,20 +621,23 @@ def group_arraylike(self) -> ArrayLike:
631
621
# _group_index is set in __init__ for MultiIndex cases
632
622
return self ._group_index ._values
633
623
634
- elif self ._all_grouper is not None :
624
+ elif (
625
+ self ._cat_grouper is not None
626
+ and self ._cat_grouper .original_grouping_vector is not None
627
+ ):
635
628
# retain dtype for categories, including unobserved ones
636
629
return self .result_index ._values
637
630
638
- return self ._codes_and_uniques [1 ]
631
+ return cast ( ArrayLike , self ._codes_and_uniques [1 ])
639
632
640
633
@cache_readonly
641
634
def result_index (self ) -> Index :
642
- # result_index retains dtype for categories, including unobserved ones,
643
- # which group_index does not
644
- if self . _all_grouper is not None :
645
- group_idx = self . group_index
646
- assert isinstance ( group_idx , CategoricalIndex )
647
- return recode_from_groupby ( self ._all_grouper , self ._sort , group_idx )
635
+ """
636
+ result_index retains dtype for categories, including unobserved ones,
637
+ which group_index does not
638
+ """
639
+ if self . _cat_grouper is not None :
640
+ return self ._cat_grouper . result_index ( self .group_index )
648
641
return self .group_index
649
642
650
643
@cache_readonly
@@ -657,26 +650,10 @@ def group_index(self) -> Index:
657
650
return Index ._with_infer (uniques , name = self .name )
658
651
659
652
@cache_readonly
660
- def _codes_and_uniques (self ) -> tuple [npt .NDArray [np .signedinteger ], ArrayLike ]:
661
- if self ._passed_categorical :
662
- # we make a CategoricalIndex out of the cat grouper
663
- # preserving the categories / ordered attributes
664
- cat = self .grouping_vector
665
- categories = cat .categories
666
-
667
- if self ._observed :
668
- ucodes = algorithms .unique1d (cat .codes )
669
- ucodes = ucodes [ucodes != - 1 ]
670
- if self ._sort or cat .ordered :
671
- ucodes = np .sort (ucodes )
672
- else :
673
- ucodes = np .arange (len (categories ))
674
-
675
- uniques = Categorical .from_codes (
676
- codes = ucodes , categories = categories , ordered = cat .ordered
677
- )
678
- return cat .codes , uniques
679
-
653
+ def _codes_and_uniques (self ) -> tuple [npt .NDArray [np .signedinteger ], AnyArrayLike ]:
654
+ uniques : AnyArrayLike
655
+ if self ._cat_grouper is not None :
656
+ return self ._cat_grouper .codes_and_uniques (self .grouping_vector )
680
657
elif isinstance (self .grouping_vector , ops .BaseGrouper ):
681
658
# we have a list of groupers
682
659
codes = self .grouping_vector .codes_info
0 commit comments