Skip to content

Commit dc947a4

Browse files
authored
REF: combine groupby helpers (pandas-dev#51516)
* REF: simplify _apply_to_column_groupbys, rename apply->apply_groupwise * REF: consolidate self.as_index check * REF: remove unused _iterate_slices
1 parent 2deba19 commit dc947a4

File tree

3 files changed

+28
-43
lines changed

3 files changed

+28
-43
lines changed

pandas/core/groupby/generic.py

+25-27
Original file line numberDiff line numberDiff line change
@@ -154,9 +154,6 @@ def _get_data_to_aggregate(
154154
)
155155
return single
156156

157-
def _iterate_slices(self) -> Iterable[Series]:
158-
yield self._selected_obj
159-
160157
_agg_examples_doc = dedent(
161158
"""
162159
Examples
@@ -408,7 +405,9 @@ def _aggregate_named(self, func, *args, **kwargs):
408405
result = {}
409406
initialized = False
410407

411-
for name, group in self:
408+
for name, group in self.grouper.get_iterator(
409+
self._selected_obj, axis=self.axis
410+
):
412411
object.__setattr__(group, "name", name)
413412

414413
output = func(group, *args, **kwargs)
@@ -568,7 +567,11 @@ def true_and_notna(x) -> bool:
568567

569568
try:
570569
indices = [
571-
self._get_index(name) for name, group in self if true_and_notna(group)
570+
self._get_index(name)
571+
for name, group in self.grouper.get_iterator(
572+
self._selected_obj, axis=self.axis
573+
)
574+
if true_and_notna(group)
572575
]
573576
except (ValueError, TypeError) as err:
574577
raise TypeError("the filter must return a boolean result") from err
@@ -1850,29 +1853,33 @@ def _indexed_output_to_ndframe(
18501853
def _wrap_agged_manager(self, mgr: Manager2D) -> DataFrame:
18511854
return self.obj._constructor(mgr)
18521855

1853-
def _iterate_column_groupbys(self, obj: DataFrame):
1854-
for i, colname in enumerate(obj.columns):
1855-
yield colname, SeriesGroupBy(
1856+
def _apply_to_column_groupbys(self, func) -> DataFrame:
1857+
from pandas.core.reshape.concat import concat
1858+
1859+
obj = self._obj_with_exclusions
1860+
columns = obj.columns
1861+
sgbs = [
1862+
SeriesGroupBy(
18561863
obj.iloc[:, i],
18571864
selection=colname,
18581865
grouper=self.grouper,
18591866
exclusions=self.exclusions,
18601867
observed=self.observed,
18611868
)
1862-
1863-
def _apply_to_column_groupbys(self, func, obj: DataFrame) -> DataFrame:
1864-
from pandas.core.reshape.concat import concat
1865-
1866-
columns = obj.columns
1867-
results = [
1868-
func(col_groupby) for _, col_groupby in self._iterate_column_groupbys(obj)
1869+
for i, colname in enumerate(obj.columns)
18691870
]
1871+
results = [func(sgb) for sgb in sgbs]
18701872

18711873
if not len(results):
18721874
# concat would raise
1873-
return DataFrame([], columns=columns, index=self.grouper.result_index)
1875+
res_df = DataFrame([], columns=columns, index=self.grouper.result_index)
18741876
else:
1875-
return concat(results, keys=columns, axis=1)
1877+
res_df = concat(results, keys=columns, axis=1)
1878+
1879+
if not self.as_index:
1880+
res_df.index = default_index(len(res_df))
1881+
res_df = self._insert_inaxis_grouper(res_df)
1882+
return res_df
18761883

18771884
def nunique(self, dropna: bool = True) -> DataFrame:
18781885
"""
@@ -1925,16 +1932,7 @@ def nunique(self, dropna: bool = True) -> DataFrame:
19251932
lambda sgb: sgb.nunique(dropna), self._obj_with_exclusions, is_agg=True
19261933
)
19271934

1928-
obj = self._obj_with_exclusions
1929-
results = self._apply_to_column_groupbys(
1930-
lambda sgb: sgb.nunique(dropna), obj=obj
1931-
)
1932-
1933-
if not self.as_index:
1934-
results.index = default_index(len(results))
1935-
results = self._insert_inaxis_grouper(results)
1936-
1937-
return results
1935+
return self._apply_to_column_groupbys(lambda sgb: sgb.nunique(dropna))
19381936

19391937
def idxmax(
19401938
self,

pandas/core/groupby/groupby.py

+2-15
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ class providing the base-class of operations.
1919
TYPE_CHECKING,
2020
Callable,
2121
Hashable,
22-
Iterable,
2322
Iterator,
2423
List,
2524
Literal,
@@ -990,12 +989,6 @@ def curried(x):
990989
result = self._set_result_index_ordered(result)
991990
return result
992991

993-
# -----------------------------------------------------------------
994-
# Selection
995-
996-
def _iterate_slices(self) -> Iterable[Series]:
997-
raise AbstractMethodError(self)
998-
999992
# -----------------------------------------------------------------
1000993
# Dispatch/Wrapping
1001994

@@ -1398,7 +1391,7 @@ def _python_apply_general(
13981391
Series or DataFrame
13991392
data after applying f
14001393
"""
1401-
values, mutated = self.grouper.apply(f, data, self.axis)
1394+
values, mutated = self.grouper.apply_groupwise(f, data, self.axis)
14021395
if not_indexed_same is None:
14031396
not_indexed_same = mutated
14041397

@@ -2462,7 +2455,6 @@ def ohlc(self) -> DataFrame:
24622455
Open, high, low and close values within each group.
24632456
"""
24642457
if self.obj.ndim == 1:
2465-
# self._iterate_slices() yields only self._selected_obj
24662458
obj = self._selected_obj
24672459

24682460
is_numeric = is_numeric_dtype(obj.dtype)
@@ -2479,12 +2471,7 @@ def ohlc(self) -> DataFrame:
24792471
)
24802472
return self._reindex_output(result)
24812473

2482-
result = self._apply_to_column_groupbys(
2483-
lambda x: x.ohlc(), self._obj_with_exclusions
2484-
)
2485-
if not self.as_index:
2486-
result = self._insert_inaxis_grouper(result)
2487-
result.index = default_index(len(result))
2474+
result = self._apply_to_column_groupbys(lambda sgb: sgb.ohlc())
24882475
return result
24892476

24902477
@doc(DataFrame.describe)

pandas/core/groupby/ops.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -748,7 +748,7 @@ def group_keys_seq(self):
748748
return get_flattened_list(ids, ngroups, self.levels, self.codes)
749749

750750
@final
751-
def apply(
751+
def apply_groupwise(
752752
self, f: Callable, data: DataFrame | Series, axis: AxisInt = 0
753753
) -> tuple[list, bool]:
754754
mutated = False

0 commit comments

Comments
 (0)