Skip to content

Commit e9478e5

Browse files
authored
REF: privatize Grouping attrs (#41534)
1 parent e5f1f9c commit e9478e5

File tree

2 files changed

+28
-21
lines changed

2 files changed

+28
-21
lines changed

pandas/core/groupby/grouper.py

+26-19
Original file line numberDiff line numberDiff line change
@@ -441,6 +441,9 @@ class Grouping:
441441

442442
_codes: np.ndarray | None = None
443443
_group_index: Index | None = None
444+
_passed_categorical: bool
445+
_all_grouper: Categorical | None
446+
_index: Index
444447

445448
def __init__(
446449
self,
@@ -456,13 +459,13 @@ def __init__(
456459
self.level = level
457460
self._orig_grouper = grouper
458461
self.grouping_vector = _convert_grouper(index, grouper)
459-
self.all_grouper = None
460-
self.index = index
461-
self.sort = sort
462+
self._all_grouper = None
463+
self._index = index
464+
self._sort = sort
462465
self.obj = obj
463-
self.observed = observed
466+
self._observed = observed
464467
self.in_axis = in_axis
465-
self.dropna = dropna
468+
self._dropna = dropna
466469

467470
self._passed_categorical = False
468471

@@ -471,11 +474,15 @@ def __init__(
471474

472475
ilevel = self._ilevel
473476
if ilevel is not None:
477+
mapper = self.grouping_vector
478+
# In extant tests, the new self.grouping_vector matches
479+
# `index.get_level_values(ilevel)` whenever
480+
# mapper is None and isinstance(index, MultiIndex)
474481
(
475482
self.grouping_vector, # Index
476483
self._codes,
477484
self._group_index,
478-
) = index._get_grouper_for_level(self.grouping_vector, ilevel)
485+
) = index._get_grouper_for_level(mapper, ilevel)
479486

480487
# a passed Grouper like, directly get the grouper in the same way
481488
# as single grouper groupby, use the group_info to get codes
@@ -505,8 +512,8 @@ def __init__(
505512
# a passed Categorical
506513
self._passed_categorical = True
507514

508-
self.grouping_vector, self.all_grouper = recode_for_groupby(
509-
self.grouping_vector, self.sort, observed
515+
self.grouping_vector, self._all_grouper = recode_for_groupby(
516+
self.grouping_vector, sort, observed
510517
)
511518

512519
elif not isinstance(
@@ -517,11 +524,11 @@ def __init__(
517524
t = self.name or str(type(self.grouping_vector))
518525
raise ValueError(f"Grouper for '{t}' not 1-dimensional")
519526

520-
self.grouping_vector = self.index.map(self.grouping_vector)
527+
self.grouping_vector = index.map(self.grouping_vector)
521528

522529
if not (
523530
hasattr(self.grouping_vector, "__len__")
524-
and len(self.grouping_vector) == len(self.index)
531+
and len(self.grouping_vector) == len(index)
525532
):
526533
grper = pprint_thing(self.grouping_vector)
527534
errmsg = (
@@ -546,7 +553,7 @@ def __iter__(self):
546553
def name(self) -> Hashable:
547554
ilevel = self._ilevel
548555
if ilevel is not None:
549-
return self.index.names[ilevel]
556+
return self._index.names[ilevel]
550557

551558
if isinstance(self._orig_grouper, (Index, Series)):
552559
return self._orig_grouper.name
@@ -569,7 +576,7 @@ def _ilevel(self) -> int | None:
569576
if level is None:
570577
return None
571578
if not isinstance(level, int):
572-
index = self.index
579+
index = self._index
573580
if level not in index.names:
574581
raise AssertionError(f"Level {level} not in index")
575582
return index.names.index(level)
@@ -607,10 +614,10 @@ def group_arraylike(self) -> ArrayLike:
607614
@cache_readonly
608615
def result_index(self) -> Index:
609616
# TODO: what's the difference between result_index vs group_index?
610-
if self.all_grouper is not None:
617+
if self._all_grouper is not None:
611618
group_idx = self.group_index
612619
assert isinstance(group_idx, CategoricalIndex)
613-
return recode_from_groupby(self.all_grouper, self.sort, group_idx)
620+
return recode_from_groupby(self._all_grouper, self._sort, group_idx)
614621
return self.group_index
615622

616623
@cache_readonly
@@ -629,10 +636,10 @@ def _codes_and_uniques(self) -> tuple[np.ndarray, ArrayLike]:
629636
cat = self.grouping_vector
630637
categories = cat.categories
631638

632-
if self.observed:
639+
if self._observed:
633640
ucodes = algorithms.unique1d(cat.codes)
634641
ucodes = ucodes[ucodes != -1]
635-
if self.sort or cat.ordered:
642+
if self._sort or cat.ordered:
636643
ucodes = np.sort(ucodes)
637644
else:
638645
ucodes = np.arange(len(categories))
@@ -648,18 +655,18 @@ def _codes_and_uniques(self) -> tuple[np.ndarray, ArrayLike]:
648655
uniques = self.grouping_vector.result_arraylike
649656
else:
650657
# GH35667, replace dropna=False with na_sentinel=None
651-
if not self.dropna:
658+
if not self._dropna:
652659
na_sentinel = None
653660
else:
654661
na_sentinel = -1
655662
codes, uniques = algorithms.factorize(
656-
self.grouping_vector, sort=self.sort, na_sentinel=na_sentinel
663+
self.grouping_vector, sort=self._sort, na_sentinel=na_sentinel
657664
)
658665
return codes, uniques
659666

660667
@cache_readonly
661668
def groups(self) -> dict[Hashable, np.ndarray]:
662-
return self.index.groupby(Categorical.from_codes(self.codes, self.group_index))
669+
return self._index.groupby(Categorical.from_codes(self.codes, self.group_index))
663670

664671

665672
def get_grouper(

pandas/core/groupby/ops.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -682,7 +682,7 @@ def __init__(
682682

683683
self.axis = axis
684684
self._groupings: list[grouper.Grouping] = list(groupings)
685-
self.sort = sort
685+
self._sort = sort
686686
self.group_keys = group_keys
687687
self.mutated = mutated
688688
self.indexer = indexer
@@ -895,7 +895,7 @@ def codes_info(self) -> np.ndarray:
895895
def _get_compressed_codes(self) -> tuple[np.ndarray, np.ndarray]:
896896
if len(self.groupings) > 1:
897897
group_index = get_group_index(self.codes, self.shape, sort=True, xnull=True)
898-
return compress_group_index(group_index, sort=self.sort)
898+
return compress_group_index(group_index, sort=self._sort)
899899

900900
ping = self.groupings[0]
901901
return ping.codes, np.arange(len(ping.group_index))

0 commit comments

Comments
 (0)