From f97361b7fe2ce81c7abc54f3906ad7826449fed6 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 15 May 2020 19:56:08 -0700 Subject: [PATCH 1/2] PERF: speedup non-fastpath in groupby ops --- pandas/core/groupby/ops.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 597a160995eef..6fc7772cf439f 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -167,7 +167,7 @@ def apply(self, f: F, data: FrameOrSeries, axis: int = 0): # TODO: can we have a workaround for EAs backed by ndarray? pass - elif ( + elif False and ( com.get_callable_name(f) not in base.plotting_methods and isinstance(splitter, FrameSplitter) and axis == 0 @@ -952,7 +952,9 @@ def _chop(self, sdata, slice_obj: slice) -> NDFrame: class SeriesSplitter(DataSplitter): def _chop(self, sdata: Series, slice_obj: slice) -> Series: - return sdata.iloc[slice_obj] + # fastpath equivalent to `sdata.iloc[slice_obj]` + mgr = sdata._mgr.get_slice(slice_obj) + return type(sdata)(mgr, name=sdata.name, fastpath=True) class FrameSplitter(DataSplitter): @@ -962,10 +964,13 @@ def fast_apply(self, f: F, sdata: FrameOrSeries, names): return libreduction.apply_frame_axis0(sdata, f, names, starts, ends) def _chop(self, sdata: DataFrame, slice_obj: slice) -> DataFrame: - if self.axis == 0: - return sdata.iloc[slice_obj] - else: - return sdata.iloc[:, slice_obj] + # Fastpath equivalent to: + # if self.axis == 0: + # return sdata.iloc[slice_obj] + # else: + # return sdata.iloc[:, slice_obj] + mgr = sdata._mgr.get_slice(slice_obj, axis=1 - self.axis) + return type(sdata)(mgr) def get_splitter( From f143ed22c723bca0004612c5cfd930620e75e113 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 16 May 2020 08:49:05 -0700 Subject: [PATCH 2/2] PERF: make_block_same_class --- pandas/core/groupby/ops.py | 2 +- pandas/core/internals/blocks.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 6fc7772cf439f..74db87f46c5e2 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -167,7 +167,7 @@ def apply(self, f: F, data: FrameOrSeries, axis: int = 0): # TODO: can we have a workaround for EAs backed by ndarray? pass - elif False and ( + elif ( com.get_callable_name(f) not in base.plotting_methods and isinstance(splitter, FrameSplitter) and axis == 0 diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 3e2b5bdccd5d1..c052c6c9d7d1d 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -251,7 +251,7 @@ def make_block_same_class(self, values, placement=None, ndim=None): placement = self.mgr_locs if ndim is None: ndim = self.ndim - return make_block(values, placement=placement, ndim=ndim, klass=type(self)) + return type(self)(values, placement=placement, ndim=ndim) def __repr__(self) -> str: # don't want to print out all of the items here