From 850e68906e410a1053c1ada552b297ae45e1f373 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 10 Sep 2021 13:10:35 -0700 Subject: [PATCH 1/2] REF: de-duplicate operate-column-wise code --- pandas/core/groupby/generic.py | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 1cbfcff5e94f1..dbb640a5bdf19 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1084,14 +1084,10 @@ def _aggregate_item_by_item(self, func, *args, **kwargs) -> DataFrame: # test_resample_apply_product obj = self._obj_with_exclusions - result: dict[int | str, NDFrame] = {} - for i, item in enumerate(obj): - ser = obj.iloc[:, i] - colg = SeriesGroupBy( - ser, selection=item, grouper=self.grouper, exclusions=self.exclusions - ) + result: dict[int, NDFrame] = {} - result[i] = colg.aggregate(func, *args, **kwargs) + for i, (item, sgb) in enumerate(self._iterate_column_groupbys(obj)): + result[i] = sgb.aggregate(func, *args, **kwargs) res_df = self.obj._constructor(result) res_df.columns = obj.columns @@ -1370,14 +1366,7 @@ def _transform_item_by_item(self, obj: DataFrame, wrapper) -> DataFrame: # gets here with non-unique columns output = {} inds = [] - for i, col in enumerate(obj): - subset = obj.iloc[:, i] - sgb = SeriesGroupBy( - subset, - selection=col, - grouper=self.grouper, - exclusions=self.exclusions, - ) + for i, (colname, sgb) in enumerate(self._iterate_column_groupbys(obj)): try: output[i] = sgb.transform(wrapper) except TypeError: From 7765bea40c4e5862d3133b968d7d5e57300c2d10 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 10 Sep 2021 16:50:02 -0700 Subject: [PATCH 2/2] REF: remove redundant pct_change --- pandas/core/groupby/generic.py | 18 ------------------ pandas/core/groupby/groupby.py | 2 ++ 2 files changed, 2 insertions(+), 18 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index dbb640a5bdf19..a3d749bdbe5cb 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -791,24 +791,6 @@ def count(self) -> Series: ) return self._reindex_output(result, fill_value=0) - def pct_change(self, periods=1, fill_method="pad", limit=None, freq=None): - """Calculate pct_change of each value to previous entry in group""" - # TODO: Remove this conditional when #23918 is fixed - if freq: - return self.apply( - lambda x: x.pct_change( - periods=periods, fill_method=fill_method, limit=limit, freq=freq - ) - ) - if fill_method is None: # GH30463 - fill_method = "pad" - limit = 0 - filled = getattr(self, fill_method)(limit=limit) - fill_grp = filled.groupby(self.grouper.codes) - shifted = fill_grp.shift(periods=periods, freq=freq) - - return (filled / shifted) - 1 - @doc(Series.nlargest) def nlargest(self, n: int = 5, keep: str = "first"): f = partial(Series.nlargest, n=n, keep=keep) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 85e7c9a62b2d4..20a9f7d088ae9 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -3210,6 +3210,7 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None): ) return res + @final @Substitution(name="groupby") @Appender(_common_see_also) def pct_change(self, periods=1, fill_method="pad", limit=None, freq=None, axis=0): @@ -3221,6 +3222,7 @@ def pct_change(self, periods=1, fill_method="pad", limit=None, freq=None, axis=0 Series or DataFrame Percentage changes within each group. """ + # TODO: Remove this conditional for SeriesGroupBy when GH#23918 is fixed if freq is not None or axis != 0: return self.apply( lambda x: x.pct_change(