Skip to content

Commit ac170fd

Browse files
authored
DEPR: groupby.grouper (pandas-dev#56521)
* DEPR: groupby.grouper * DEPR: groupby.grouper * fix whatsnew, tests * Restore test
1 parent 6ee9ad0 commit ac170fd

File tree

17 files changed

+183
-192
lines changed

17 files changed

+183
-192
lines changed

doc/source/whatsnew/v2.2.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -477,7 +477,7 @@ Other Deprecations
477477
- Deprecated strings ``H``, ``S``, ``U``, and ``N`` denoting units in :func:`to_timedelta` (:issue:`52536`)
478478
- Deprecated strings ``H``, ``T``, ``S``, ``L``, ``U``, and ``N`` denoting units in :class:`Timedelta` (:issue:`52536`)
479479
- Deprecated strings ``T``, ``S``, ``L``, ``U``, and ``N`` denoting frequencies in :class:`Minute`, :class:`Second`, :class:`Milli`, :class:`Micro`, :class:`Nano` (:issue:`52536`)
480-
- Deprecated the :class:`.BaseGrouper` attributes ``group_keys_seq`` and ``reconstructed_codes``; these will be removed in a future version of pandas (:issue:`56148`)
480+
- Deprecated the :attr:`.DataFrameGroupBy.grouper` and :attr:`SeriesGroupBy.grouper`; these attributes will be removed in a future version of pandas (:issue:`56521`)
481481
- Deprecated the :class:`.Grouping` attributes ``group_index``, ``result_index``, and ``group_arraylike``; these will be removed in a future version of pandas (:issue:`56148`)
482482
- Deprecated the ``errors="ignore"`` option in :func:`to_datetime`, :func:`to_timedelta`, and :func:`to_numeric`; explicitly catch exceptions instead (:issue:`54467`)
483483
- Deprecated the ``fastpath`` keyword in the :class:`Series` constructor (:issue:`20110`)

pandas/core/frame.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -7452,7 +7452,7 @@ def value_counts(
74527452
subset = self.columns.tolist()
74537453

74547454
name = "proportion" if normalize else "count"
7455-
counts = self.groupby(subset, dropna=dropna, observed=False).grouper.size()
7455+
counts = self.groupby(subset, dropna=dropna, observed=False)._grouper.size()
74567456
counts.name = name
74577457

74587458
if sort:

pandas/core/groupby/generic.py

+32-32
Original file line numberDiff line numberDiff line change
@@ -283,11 +283,11 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
283283
return self.obj._constructor(
284284
[],
285285
name=self.obj.name,
286-
index=self.grouper.result_index,
286+
index=self._grouper.result_index,
287287
dtype=obj.dtype,
288288
)
289289

290-
if self.grouper.nkeys > 1:
290+
if self._grouper.nkeys > 1:
291291
return self._python_agg_general(func, *args, **kwargs)
292292

293293
try:
@@ -309,7 +309,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
309309
)
310310

311311
# result is a dict whose keys are the elements of result_index
312-
result = Series(result, index=self.grouper.result_index)
312+
result = Series(result, index=self._grouper.result_index)
313313
result = self._wrap_aggregated_output(result)
314314
return result
315315

@@ -324,7 +324,7 @@ def _python_agg_general(self, func, *args, **kwargs):
324324
f = lambda x: func(x, *args, **kwargs)
325325

326326
obj = self._obj_with_exclusions
327-
result = self.grouper.agg_series(obj, f)
327+
result = self._grouper.agg_series(obj, f)
328328
res = obj._constructor(result, name=obj.name)
329329
return self._wrap_aggregated_output(res)
330330

@@ -404,7 +404,7 @@ def _wrap_applied_output(
404404
# GH#47787 see test_group_on_empty_multiindex
405405
res_index = data.index
406406
else:
407-
res_index = self.grouper.result_index
407+
res_index = self._grouper.result_index
408408

409409
return self.obj._constructor(
410410
[],
@@ -416,7 +416,7 @@ def _wrap_applied_output(
416416

417417
if isinstance(values[0], dict):
418418
# GH #823 #24880
419-
index = self.grouper.result_index
419+
index = self._grouper.result_index
420420
res_df = self.obj._constructor_expanddim(values, index=index)
421421
res_df = self._reindex_output(res_df)
422422
# if self.observed is False,
@@ -439,7 +439,7 @@ def _wrap_applied_output(
439439
else:
440440
# GH #6265 #24880
441441
result = self.obj._constructor(
442-
data=values, index=self.grouper.result_index, name=self.obj.name
442+
data=values, index=self._grouper.result_index, name=self.obj.name
443443
)
444444
if not self.as_index:
445445
result = self._insert_inaxis_grouper(result)
@@ -452,7 +452,7 @@ def _aggregate_named(self, func, *args, **kwargs):
452452
result = {}
453453
initialized = False
454454

455-
for name, group in self.grouper.get_iterator(
455+
for name, group in self._grouper.get_iterator(
456456
self._obj_with_exclusions, axis=self.axis
457457
):
458458
# needed for pandas/tests/groupby/test_groupby.py::test_basic_aggregations
@@ -526,7 +526,7 @@ def _cython_transform(
526526
obj = self._obj_with_exclusions
527527

528528
try:
529-
result = self.grouper._cython_operation(
529+
result = self._grouper._cython_operation(
530530
"transform", obj._values, how, axis, **kwargs
531531
)
532532
except NotImplementedError as err:
@@ -549,7 +549,7 @@ def _transform_general(
549549
klass = type(self.obj)
550550

551551
results = []
552-
for name, group in self.grouper.get_iterator(
552+
for name, group in self._grouper.get_iterator(
553553
self._obj_with_exclusions, axis=self.axis
554554
):
555555
# this setattr is needed for test_transform_lambda_with_datetimetz
@@ -621,7 +621,7 @@ def true_and_notna(x) -> bool:
621621
try:
622622
indices = [
623623
self._get_index(name)
624-
for name, group in self.grouper.get_iterator(
624+
for name, group in self._grouper.get_iterator(
625625
self._obj_with_exclusions, axis=self.axis
626626
)
627627
if true_and_notna(group)
@@ -673,11 +673,11 @@ def nunique(self, dropna: bool = True) -> Series | DataFrame:
673673
2023-02-01 1
674674
Freq: MS, dtype: int64
675675
"""
676-
ids, _, ngroups = self.grouper.group_info
676+
ids, _, ngroups = self._grouper.group_info
677677
val = self.obj._values
678678
codes, uniques = algorithms.factorize(val, use_na_sentinel=dropna, sort=False)
679679

680-
if self.grouper.has_dropped_na:
680+
if self._grouper.has_dropped_na:
681681
mask = ids >= 0
682682
ids = ids[mask]
683683
codes = codes[mask]
@@ -699,7 +699,7 @@ def nunique(self, dropna: bool = True) -> Series | DataFrame:
699699
res = np.bincount(ids[~mask], minlength=ngroups)
700700
res = ensure_int64(res)
701701

702-
ri = self.grouper.result_index
702+
ri = self._grouper.result_index
703703
result: Series | DataFrame = self.obj._constructor(
704704
res, index=ri, name=self.obj.name
705705
)
@@ -734,10 +734,10 @@ def value_counts(
734734
from pandas.core.reshape.merge import get_join_indexers
735735
from pandas.core.reshape.tile import cut
736736

737-
ids, _, _ = self.grouper.group_info
737+
ids, _, _ = self._grouper.group_info
738738
val = self.obj._values
739739

740-
index_names = self.grouper.names + [self.obj.name]
740+
index_names = self._grouper.names + [self.obj.name]
741741

742742
if isinstance(val.dtype, CategoricalDtype) or (
743743
bins is not None and not np.iterable(bins)
@@ -804,9 +804,9 @@ def value_counts(
804804
rep = partial(np.repeat, repeats=np.add.reduceat(inc, idx))
805805

806806
# multi-index components
807-
codes = self.grouper._reconstructed_codes
807+
codes = self._grouper.reconstructed_codes
808808
codes = [rep(level_codes) for level_codes in codes] + [llab(lab, inc)]
809-
levels = [ping._group_index for ping in self.grouper.groupings] + [lev]
809+
levels = [ping._group_index for ping in self._grouper.groupings] + [lev]
810810

811811
if dropna:
812812
mask = codes[-1] != -1
@@ -1461,7 +1461,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
14611461
func, *args, engine_kwargs=engine_kwargs, **kwargs
14621462
)
14631463
# grouper specific aggregations
1464-
if self.grouper.nkeys > 1:
1464+
if self._grouper.nkeys > 1:
14651465
# test_groupby_as_index_series_scalar gets here with 'not self.as_index'
14661466
return self._python_agg_general(func, *args, **kwargs)
14671467
elif args or kwargs:
@@ -1529,25 +1529,25 @@ def _python_agg_general(self, func, *args, **kwargs):
15291529

15301530
output: dict[int, ArrayLike] = {}
15311531
for idx, (name, ser) in enumerate(obj.items()):
1532-
result = self.grouper.agg_series(ser, f)
1532+
result = self._grouper.agg_series(ser, f)
15331533
output[idx] = result
15341534

15351535
res = self.obj._constructor(output)
15361536
res.columns = obj.columns.copy(deep=False)
15371537
return self._wrap_aggregated_output(res)
15381538

15391539
def _aggregate_frame(self, func, *args, **kwargs) -> DataFrame:
1540-
if self.grouper.nkeys != 1:
1540+
if self._grouper.nkeys != 1:
15411541
raise AssertionError("Number of keys must be 1")
15421542

15431543
obj = self._obj_with_exclusions
15441544

15451545
result: dict[Hashable, NDFrame | np.ndarray] = {}
1546-
for name, grp_df in self.grouper.get_iterator(obj, self.axis):
1546+
for name, grp_df in self._grouper.get_iterator(obj, self.axis):
15471547
fres = func(grp_df, *args, **kwargs)
15481548
result[name] = fres
15491549

1550-
result_index = self.grouper.result_index
1550+
result_index = self._grouper.result_index
15511551
other_ax = obj.axes[1 - self.axis]
15521552
out = self.obj._constructor(result, index=other_ax, columns=result_index)
15531553
if self.axis == 0:
@@ -1567,7 +1567,7 @@ def _wrap_applied_output(
15671567
# GH#47787 see test_group_on_empty_multiindex
15681568
res_index = data.index
15691569
else:
1570-
res_index = self.grouper.result_index
1570+
res_index = self._grouper.result_index
15711571

15721572
result = self.obj._constructor(index=res_index, columns=data.columns)
15731573
result = result.astype(data.dtypes, copy=False)
@@ -1587,7 +1587,7 @@ def _wrap_applied_output(
15871587
is_transform=is_transform,
15881588
)
15891589

1590-
key_index = self.grouper.result_index if self.as_index else None
1590+
key_index = self._grouper.result_index if self.as_index else None
15911591

15921592
if isinstance(first_not_none, (np.ndarray, Index)):
15931593
# GH#1738: values is list of arrays of unequal lengths
@@ -1693,7 +1693,7 @@ def _cython_transform(
16931693
)
16941694

16951695
def arr_func(bvalues: ArrayLike) -> ArrayLike:
1696-
return self.grouper._cython_operation(
1696+
return self._grouper._cython_operation(
16971697
"transform", bvalues, how, 1, **kwargs
16981698
)
16991699

@@ -1715,7 +1715,7 @@ def _transform_general(self, func, engine, engine_kwargs, *args, **kwargs):
17151715

17161716
applied = []
17171717
obj = self._obj_with_exclusions
1718-
gen = self.grouper.get_iterator(obj, axis=self.axis)
1718+
gen = self._grouper.get_iterator(obj, axis=self.axis)
17191719
fast_path, slow_path = self._define_paths(func, *args, **kwargs)
17201720

17211721
# Determine whether to use slow or fast path by evaluating on the first group.
@@ -1909,7 +1909,7 @@ def filter(self, func, dropna: bool = True, *args, **kwargs):
19091909
indices = []
19101910

19111911
obj = self._selected_obj
1912-
gen = self.grouper.get_iterator(obj, axis=self.axis)
1912+
gen = self._grouper.get_iterator(obj, axis=self.axis)
19131913

19141914
for name, group in gen:
19151915
# 2023-02-27 no tests are broken this pinning, but it is documented in the
@@ -1971,7 +1971,7 @@ def _gotitem(self, key, ndim: int, subset=None):
19711971
self.keys,
19721972
axis=self.axis,
19731973
level=self.level,
1974-
grouper=self.grouper,
1974+
grouper=self._grouper,
19751975
exclusions=self.exclusions,
19761976
selection=key,
19771977
as_index=self.as_index,
@@ -1987,7 +1987,7 @@ def _gotitem(self, key, ndim: int, subset=None):
19871987
subset,
19881988
self.keys,
19891989
level=self.level,
1990-
grouper=self.grouper,
1990+
grouper=self._grouper,
19911991
exclusions=self.exclusions,
19921992
selection=key,
19931993
as_index=self.as_index,
@@ -2024,7 +2024,7 @@ def _apply_to_column_groupbys(self, func) -> DataFrame:
20242024
SeriesGroupBy(
20252025
obj.iloc[:, i],
20262026
selection=colname,
2027-
grouper=self.grouper,
2027+
grouper=self._grouper,
20282028
exclusions=self.exclusions,
20292029
observed=self.observed,
20302030
)
@@ -2034,7 +2034,7 @@ def _apply_to_column_groupbys(self, func) -> DataFrame:
20342034

20352035
if not len(results):
20362036
# concat would raise
2037-
res_df = DataFrame([], columns=columns, index=self.grouper.result_index)
2037+
res_df = DataFrame([], columns=columns, index=self._grouper.result_index)
20382038
else:
20392039
res_df = concat(results, keys=columns, axis=1)
20402040

0 commit comments

Comments
 (0)