pandas-dev · jbrockmendel · Feb 19, 2020
diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx
@@ -454,142 +454,6 @@ cdef class Slider:
         self.buf.strides[0] = self.orig_stride
 
 
-class InvalidApply(Exception):
-    pass
-
-
-def apply_frame_axis0(object frame, object f, object names,
-                      const int64_t[:] starts, const int64_t[:] ends):
-    cdef:
-        BlockSlider slider
-        Py_ssize_t i, n = len(starts)
-        list results
-        object piece
-        dict item_cache
-
-    # We have already checked that we don't have a MultiIndex before calling
-    assert frame.index.nlevels == 1
-
-    results = []
-
-    slider = BlockSlider(frame)
-
-    mutated = False
-    item_cache = slider.dummy._item_cache
-    try:
-        for i in range(n):
-            slider.move(starts[i], ends[i])
-
-            item_cache.clear()  # ugh
-            chunk = slider.dummy
-            object.__setattr__(chunk, 'name', names[i])
-
-            try:
-                piece = f(chunk)
-            except Exception:
-                # We can't be more specific without knowing something about `f`
-                raise InvalidApply('Let this error raise above us')
-
-            # Need to infer if low level index slider will cause segfaults
-            require_slow_apply = i == 0 and piece is chunk
-            try:
-                if piece.index is not chunk.index:
-                    mutated = True
-            except AttributeError:
-                # `piece` might not have an index, could be e.g. an int
-                pass
-
-            if not is_scalar(piece):
-                # Need to copy data to avoid appending references
-                try:
-                    piece = piece.copy(deep="all")
-                except (TypeError, AttributeError):
-                    piece = copy(piece)
-
-            results.append(piece)
-
-            # If the data was modified inplace we need to
-            # take the slow path to not risk segfaults
-            # we have already computed the first piece
-            if require_slow_apply:
-                break
-    finally:
-        slider.reset()
-
-    return results, mutated
-
-
-cdef class BlockSlider:
-    """
-    Only capable of sliding on axis=0
-    """
-
-    cdef public:
-        object frame, dummy, index
-        int nblocks
-        Slider idx_slider
-        list blocks
-
-    cdef:
-        char **base_ptrs
-
-    def __init__(self, frame):
-        self.frame = frame
-        self.dummy = frame[:0]
-        self.index = self.dummy.index
-
-        self.blocks = [b.values for b in self.dummy._data.blocks]
-
-        for x in self.blocks:
-            util.set_array_not_contiguous(x)
-
-        self.nblocks = len(self.blocks)
-        # See the comment in indexes/base.py about _index_data.
-        # We need this for EA-backed indexes that have a reference to a 1-d
-        # ndarray like datetime / timedelta / period.
-        self.idx_slider = Slider(
-            self.frame.index._index_data, self.dummy.index._index_data)
-
-        self.base_ptrs = <char**>malloc(sizeof(char*) * len(self.blocks))
-        for i, block in enumerate(self.blocks):
-            self.base_ptrs[i] = (<ndarray>block).data
-
-    def __dealloc__(self):
-        free(self.base_ptrs)
-
-    cdef move(self, int start, int end):
-        cdef:
-            ndarray arr
-            Py_ssize_t i
-
-        # move blocks
-        for i in range(self.nblocks):
-            arr = self.blocks[i]
-
-            # axis=1 is the frame's axis=0
-            arr.data = self.base_ptrs[i] + arr.strides[1] * start
-            arr.shape[1] = end - start
-
-        # move and set the index
-        self.idx_slider.move(start, end)
-
-        object.__setattr__(self.index, '_index_data', self.idx_slider.buf)
-        self.index._engine.clear_mapping()
-
-    cdef reset(self):
-        cdef:
-            ndarray arr
-            Py_ssize_t i
-
-        # reset blocks
-        for i in range(self.nblocks):
-            arr = self.blocks[i]
-
-            # axis=1 is the frame's axis=0
-            arr.data = self.base_ptrs[i]
-            arr.shape[1] = 0
-
-
 def compute_reduction(arr: np.ndarray, f, axis: int = 0, dummy=None, labels=None):
     """
 

diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
@@ -43,7 +43,7 @@
 import pandas.core.common as com
 from pandas.core.frame import DataFrame
 from pandas.core.generic import NDFrame
-from pandas.core.groupby import base, grouper
+from pandas.core.groupby import grouper
 from pandas.core.indexes.api import Index, MultiIndex, ensure_index
 from pandas.core.series import Series
 from pandas.core.sorting import (
@@ -154,37 +154,6 @@ def apply(self, f, data: FrameOrSeries, axis: int = 0):
         group_keys = self._get_group_keys()
         result_values = None
 
-        sdata: FrameOrSeries = splitter._get_sorted_data()
-        if sdata.ndim == 2 and np.any(sdata.dtypes.apply(is_extension_array_dtype)):
-            # calling splitter.fast_apply will raise TypeError via apply_frame_axis0
-            #  if we pass EA instead of ndarray
-            #  TODO: can we have a workaround for EAs backed by ndarray?
-            pass
-
-        elif (
-            com.get_callable_name(f) not in base.plotting_methods
-            and isinstance(splitter, FrameSplitter)
-            and axis == 0
-            # fast_apply/libreduction doesn't allow non-numpy backed indexes
-            and not sdata.index._has_complex_internals
-        ):
-            try:
-                result_values, mutated = splitter.fast_apply(f, sdata, group_keys)
-
-            except libreduction.InvalidApply as err:
-                # This Exception is raised if `f` triggers an exception
-                # but it is preferable to raise the exception in Python.
-                if "Let this error raise above us" not in str(err):
-                    # TODO: can we infer anything about whether this is
-                    #  worth-retrying in pure-python?
-                    raise
-
-            else:
-                # If the fast apply path could be used we can return here.
-                # Otherwise we need to fall back to the slow implementation.
-                if len(result_values) == len(group_keys):
-                    return group_keys, result_values, mutated
-
         for key, (i, group) in zip(group_keys, splitter):
             object.__setattr__(group, "name", key)
 
@@ -925,11 +894,6 @@ def _chop(self, sdata: Series, slice_obj: slice) -> Series:
 
 
 class FrameSplitter(DataSplitter):
-    def fast_apply(self, f, sdata: FrameOrSeries, names):
-        # must return keys::list, values::list, mutated::bool
-        starts, ends = lib.generate_slices(self.slabels, self.ngroups)
-        return libreduction.apply_frame_axis0(sdata, f, names, starts, ends)
-
     def _chop(self, sdata: DataFrame, slice_obj: slice) -> DataFrame:
         if self.axis == 0:
             return sdata.iloc[slice_obj]

diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
@@ -82,39 +82,6 @@ def test_apply_trivial_fail():
     tm.assert_frame_equal(result, expected)
 
 
-def test_fast_apply():
-    # make sure that fast apply is correctly called
-    # rather than raising any kind of error
-    # otherwise the python path will be callsed
-    # which slows things down
-    N = 1000
-    labels = np.random.randint(0, 2000, size=N)
-    labels2 = np.random.randint(0, 3, size=N)
-    df = DataFrame(
-        {
-            "key": labels,
-            "key2": labels2,
-            "value1": np.random.randn(N),
-            "value2": ["foo", "bar", "baz", "qux"] * (N // 4),
-        }
-    )
-
-    def f(g):
-        return 1
-
-    g = df.groupby(["key", "key2"])
-
-    grouper = g.grouper
-
-    splitter = grouper._get_splitter(g._selected_obj, axis=g.axis)
-    group_keys = grouper._get_group_keys()
-    sdata = splitter._get_sorted_data()
-
-    values, mutated = splitter.fast_apply(f, sdata, group_keys)
-
-    assert not mutated
-
-
 @pytest.mark.parametrize(
     "df, group_names",
     [

diff --git a/pandas/tests/groupby/test_whitelist.py b/pandas/tests/groupby/test_whitelist.py
@@ -367,7 +367,6 @@ def test_groupby_selection_with_methods(df):
         "ffill",
         "bfill",
         "pct_change",
-        "tshift",
     ]
 
     for m in methods:
@@ -377,6 +376,11 @@ def test_groupby_selection_with_methods(df):
         # should always be frames!
         tm.assert_frame_equal(res, exp)
 
+    with pytest.raises(ValueError, match="Freq was not given"):
+        g.tshift()
+    with pytest.raises(ValueError, match="Freq was not given"):
+        g_exp.tshift()
+
     # methods which aren't just .foo()
     tm.assert_frame_equal(g.fillna(0), g_exp.fillna(0))
     tm.assert_frame_equal(g.dtypes, g_exp.dtypes)