diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e92059c552b65..db53db4b7617e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -469,6 +469,32 @@ def _validate_dtype(cls, dtype) -> DtypeObj | None: return dtype + @property + def _can_fast_construct(self) -> bool_t: + """ + Check if we can avoid _constructor lookup and __finalize__ calls. + """ + # GH#46505 repeated __finalize__ calls caused perf regression, + # see if we can avoid those in some cases. + from pandas import ( + DataFrame, + Series, + ) + + if self.attrs: + # __finalize__ is not a no-op + return False + + if type(self) is Series: + if self._metadata == ["name"]: + return True + return False + elif type(self) is DataFrame: + return not self._metadata + + # subclass, may have overridden _constructor -> cannot fast-construct + return False + # ---------------------------------------------------------------------- # Construction diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index bff61ec135d74..2acd09c506e62 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -1257,8 +1257,13 @@ def __iter__(self) -> Iterator: starts, ends = lib.generate_slices(self._slabels, self.ngroups) + if sdata._can_fast_construct: + chop = self._chop_fast + else: + chop = self._chop + for start, end in zip(starts, ends): - yield self._chop(sdata, slice(start, end)) + yield chop(sdata, slice(start, end)) @cache_readonly def _sorted_data(self) -> NDFrameT: @@ -1267,6 +1272,9 @@ def _sorted_data(self) -> NDFrameT: def _chop(self, sdata, slice_obj: slice) -> NDFrame: raise AbstractMethodError(self) + def _chop_fast(self, sdata, slice_obj: slice) -> NDFrame: + raise AbstractMethodError(self) + class SeriesSplitter(DataSplitter): def _chop(self, sdata: Series, slice_obj: slice) -> Series: @@ -1275,6 +1283,12 @@ def _chop(self, sdata: Series, slice_obj: slice) -> Series: ser = sdata._constructor(mgr, name=sdata.name, fastpath=True) return ser.__finalize__(sdata, method="groupby") + def _chop_fast(self, sdata: Series, slice_obj: slice) -> Series: + # _chop specialized to cast with _can_fast_construct + mgr = sdata._mgr.get_slice(slice_obj) + ser = Series(mgr, name=sdata.name, fastpath=True) + return ser + class FrameSplitter(DataSplitter): def _chop(self, sdata: DataFrame, slice_obj: slice) -> DataFrame: @@ -1287,6 +1301,12 @@ def _chop(self, sdata: DataFrame, slice_obj: slice) -> DataFrame: df = sdata._constructor(mgr) return df.__finalize__(sdata, method="groupby") + def _chop_fast(self, sdata: DataFrame, slice_obj: slice) -> DataFrame: + # _chop specialized to cast with _can_fast_construct + mgr = sdata._mgr.get_slice(slice_obj, axis=1 - self.axis) + df = DataFrame(mgr) + return df + def get_splitter( data: NDFrame, labels: np.ndarray, ngroups: int, axis: AxisInt = 0