Skip to content

Commit 3022b62

Browse files
jbrockmendelTLouf
authored andcommitted
REF: Grouping.grouper -> Grouping.grouping_vector (pandas-dev#41532)
1 parent 1a34d8f commit 3022b62

File tree

6 files changed

+42
-40
lines changed

6 files changed

+42
-40
lines changed

pandas/core/groupby/groupby.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -3064,7 +3064,7 @@ def _reindex_output(
30643064

30653065
# reindexing only applies to a Categorical grouper
30663066
elif not any(
3067-
isinstance(ping.grouper, (Categorical, CategoricalIndex))
3067+
isinstance(ping.grouping_vector, (Categorical, CategoricalIndex))
30683068
for ping in groupings
30693069
):
30703070
return output

pandas/core/groupby/grouper.py

+34-32
Original file line numberDiff line numberDiff line change
@@ -455,7 +455,7 @@ def __init__(
455455
):
456456
self.level = level
457457
self._orig_grouper = grouper
458-
self.grouper = _convert_grouper(index, grouper)
458+
self.grouping_vector = _convert_grouper(index, grouper)
459459
self.all_grouper = None
460460
self.index = index
461461
self.sort = sort
@@ -472,19 +472,19 @@ def __init__(
472472
ilevel = self._ilevel
473473
if ilevel is not None:
474474
(
475-
self.grouper, # Index
475+
self.grouping_vector, # Index
476476
self._codes,
477477
self._group_index,
478-
) = index._get_grouper_for_level(self.grouper, ilevel)
478+
) = index._get_grouper_for_level(self.grouping_vector, ilevel)
479479

480480
# a passed Grouper like, directly get the grouper in the same way
481481
# as single grouper groupby, use the group_info to get codes
482-
elif isinstance(self.grouper, Grouper):
482+
elif isinstance(self.grouping_vector, Grouper):
483483
# get the new grouper; we already have disambiguated
484484
# what key/level refer to exactly, don't need to
485485
# check again as we have by this point converted these
486486
# to an actual value (rather than a pd.Grouper)
487-
_, newgrouper, newobj = self.grouper._get_grouper(
487+
_, newgrouper, newobj = self.grouping_vector._get_grouper(
488488
# error: Value of type variable "FrameOrSeries" of "_get_grouper"
489489
# of "Grouper" cannot be "Optional[FrameOrSeries]"
490490
self.obj, # type: ignore[type-var]
@@ -495,44 +495,46 @@ def __init__(
495495
ng = newgrouper._get_grouper()
496496
if isinstance(newgrouper, ops.BinGrouper):
497497
# in this case we have `ng is newgrouper`
498-
self.grouper = ng
498+
self.grouping_vector = ng
499499
else:
500500
# ops.BaseGrouper
501501
# use Index instead of ndarray so we can recover the name
502-
self.grouper = Index(ng, name=newgrouper.result_index.name)
502+
self.grouping_vector = Index(ng, name=newgrouper.result_index.name)
503503

504-
elif is_categorical_dtype(self.grouper):
504+
elif is_categorical_dtype(self.grouping_vector):
505505
# a passed Categorical
506506
self._passed_categorical = True
507507

508-
self.grouper, self.all_grouper = recode_for_groupby(
509-
self.grouper, self.sort, observed
508+
self.grouping_vector, self.all_grouper = recode_for_groupby(
509+
self.grouping_vector, self.sort, observed
510510
)
511511

512-
elif not isinstance(self.grouper, (Series, Index, ExtensionArray, np.ndarray)):
512+
elif not isinstance(
513+
self.grouping_vector, (Series, Index, ExtensionArray, np.ndarray)
514+
):
513515
# no level passed
514-
if getattr(self.grouper, "ndim", 1) != 1:
515-
t = self.name or str(type(self.grouper))
516+
if getattr(self.grouping_vector, "ndim", 1) != 1:
517+
t = self.name or str(type(self.grouping_vector))
516518
raise ValueError(f"Grouper for '{t}' not 1-dimensional")
517519

518-
self.grouper = self.index.map(self.grouper)
520+
self.grouping_vector = self.index.map(self.grouping_vector)
519521

520522
if not (
521-
hasattr(self.grouper, "__len__")
522-
and len(self.grouper) == len(self.index)
523+
hasattr(self.grouping_vector, "__len__")
524+
and len(self.grouping_vector) == len(self.index)
523525
):
524-
grper = pprint_thing(self.grouper)
526+
grper = pprint_thing(self.grouping_vector)
525527
errmsg = (
526528
"Grouper result violates len(labels) == "
527529
f"len(data)\nresult: {grper}"
528530
)
529-
self.grouper = None # Try for sanity
531+
self.grouping_vector = None # Try for sanity
530532
raise AssertionError(errmsg)
531533

532-
if isinstance(self.grouper, np.ndarray):
534+
if isinstance(self.grouping_vector, np.ndarray):
533535
# if we have a date/time-like grouper, make sure that we have
534536
# Timestamps like
535-
self.grouper = sanitize_to_nanoseconds(self.grouper)
537+
self.grouping_vector = sanitize_to_nanoseconds(self.grouping_vector)
536538

537539
def __repr__(self) -> str:
538540
return f"Grouping({self.name})"
@@ -549,11 +551,11 @@ def name(self) -> Hashable:
549551
if isinstance(self._orig_grouper, (Index, Series)):
550552
return self._orig_grouper.name
551553

552-
elif isinstance(self.grouper, ops.BaseGrouper):
553-
return self.grouper.result_index.name
554+
elif isinstance(self.grouping_vector, ops.BaseGrouper):
555+
return self.grouping_vector.result_index.name
554556

555-
elif isinstance(self.grouper, Index):
556-
return self.grouper.name
557+
elif isinstance(self.grouping_vector, Index):
558+
return self.grouping_vector.name
557559

558560
# otherwise we have ndarray or ExtensionArray -> no name
559561
return None
@@ -580,10 +582,10 @@ def ngroups(self) -> int:
580582
@cache_readonly
581583
def indices(self):
582584
# we have a list of groupers
583-
if isinstance(self.grouper, ops.BaseGrouper):
584-
return self.grouper.indices
585+
if isinstance(self.grouping_vector, ops.BaseGrouper):
586+
return self.grouping_vector.indices
585587

586-
values = Categorical(self.grouper)
588+
values = Categorical(self.grouping_vector)
587589
return values._reverse_indexer()
588590

589591
@property
@@ -624,7 +626,7 @@ def _codes_and_uniques(self) -> tuple[np.ndarray, ArrayLike]:
624626
if self._passed_categorical:
625627
# we make a CategoricalIndex out of the cat grouper
626628
# preserving the categories / ordered attributes
627-
cat = self.grouper
629+
cat = self.grouping_vector
628630
categories = cat.categories
629631

630632
if self.observed:
@@ -640,18 +642,18 @@ def _codes_and_uniques(self) -> tuple[np.ndarray, ArrayLike]:
640642
)
641643
return cat.codes, uniques
642644

643-
elif isinstance(self.grouper, ops.BaseGrouper):
645+
elif isinstance(self.grouping_vector, ops.BaseGrouper):
644646
# we have a list of groupers
645-
codes = self.grouper.codes_info
646-
uniques = self.grouper.result_arraylike
647+
codes = self.grouping_vector.codes_info
648+
uniques = self.grouping_vector.result_arraylike
647649
else:
648650
# GH35667, replace dropna=False with na_sentinel=None
649651
if not self.dropna:
650652
na_sentinel = None
651653
else:
652654
na_sentinel = -1
653655
codes, uniques = algorithms.factorize(
654-
self.grouper, sort=self.sort, na_sentinel=na_sentinel
656+
self.grouping_vector, sort=self.sort, na_sentinel=na_sentinel
655657
)
656658
return codes, uniques
657659

pandas/core/groupby/ops.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -734,7 +734,7 @@ def _get_grouper(self):
734734
We have a specific method of grouping, so cannot
735735
convert to a Index for our grouper.
736736
"""
737-
return self.groupings[0].grouper
737+
return self.groupings[0].grouping_vector
738738

739739
@final
740740
def _get_group_keys(self):
@@ -858,7 +858,7 @@ def groups(self) -> dict[Hashable, np.ndarray]:
858858
if len(self.groupings) == 1:
859859
return self.groupings[0].groups
860860
else:
861-
to_groupby = zip(*(ping.grouper for ping in self.groupings))
861+
to_groupby = zip(*(ping.grouping_vector for ping in self.groupings))
862862
index = Index(to_groupby)
863863
return self.axis.groupby(index)
864864

pandas/tests/extension/base/groupby.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@ def test_grouping_grouper(self, data_for_grouping):
1515
gr1 = df.groupby("A").grouper.groupings[0]
1616
gr2 = df.groupby("B").grouper.groupings[0]
1717

18-
tm.assert_numpy_array_equal(gr1.grouper, df.A.values)
19-
tm.assert_extension_array_equal(gr2.grouper, data_for_grouping)
18+
tm.assert_numpy_array_equal(gr1.grouping_vector, df.A.values)
19+
tm.assert_extension_array_equal(gr2.grouping_vector, data_for_grouping)
2020

2121
@pytest.mark.parametrize("as_index", [True, False])
2222
def test_groupby_extension_agg(self, as_index, data_for_grouping):

pandas/tests/extension/test_boolean.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -262,8 +262,8 @@ def test_grouping_grouper(self, data_for_grouping):
262262
gr1 = df.groupby("A").grouper.groupings[0]
263263
gr2 = df.groupby("B").grouper.groupings[0]
264264

265-
tm.assert_numpy_array_equal(gr1.grouper, df.A.values)
266-
tm.assert_extension_array_equal(gr2.grouper, data_for_grouping)
265+
tm.assert_numpy_array_equal(gr1.grouping_vector, df.A.values)
266+
tm.assert_extension_array_equal(gr2.grouping_vector, data_for_grouping)
267267

268268
@pytest.mark.parametrize("as_index", [True, False])
269269
def test_groupby_extension_agg(self, as_index, data_for_grouping):

pandas/tests/groupby/aggregate/test_aggregate.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ def test_agg_grouping_is_list_tuple(ts):
161161
df = tm.makeTimeDataFrame()
162162

163163
grouped = df.groupby(lambda x: x.year)
164-
grouper = grouped.grouper.groupings[0].grouper
164+
grouper = grouped.grouper.groupings[0].grouping_vector
165165
grouped.grouper.groupings[0] = Grouping(ts.index, list(grouper))
166166

167167
result = grouped.agg(np.mean)

0 commit comments

Comments
 (0)