From cf68ab00ada85744e40a8b06b171b5b91d5e9853 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Thu, 3 Dec 2020 20:31:51 -0800
Subject: [PATCH 1/2] REF: avoid catching all exceptions in libreduction

---
 pandas/_libs/reduction.pyx | 31 +++++++++++++++++++++++++------
 pandas/core/groupby/ops.py | 11 ++++-------
 2 files changed, 29 insertions(+), 13 deletions(-)

diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx
index 4b6b71088cb7c..960f85e224ad8 100644
--- a/pandas/_libs/reduction.pyx
+++ b/pandas/_libs/reduction.pyx
@@ -365,11 +365,7 @@ def apply_frame_axis0(object frame, object f, object names,
             chunk = slider.dummy
             object.__setattr__(chunk, 'name', names[i])
 
-            try:
-                piece = f(chunk)
-            except Exception as err:
-                # We can't be more specific without knowing something about `f`
-                raise InvalidApply("Let this error raise above us") from err
+            piece = f(chunk)
 
             # Need to infer if low level index slider will cause segfaults
             require_slow_apply = i == 0 and piece is chunk
@@ -406,7 +402,8 @@ cdef class BlockSlider:
     """
     cdef:
         object frame, dummy, index, block
-        list blk_values
+        list blocks, blk_values
+        ndarray orig_blklocs, orig_blknos
         ndarray values
         Slider idx_slider
         char **base_ptrs
@@ -418,6 +415,13 @@ cdef class BlockSlider:
         self.dummy = frame[:0]
         self.index = self.dummy.index
 
+        # GH#35417 attributes we need to restore at each step in case
+        #  the function modified them.
+        mgr = self.dummy._mgr
+        self.orig_blklocs = mgr.blklocs
+        self.orig_blknos = mgr.blknos
+        self.blocks = [x for x in self.dummy._mgr.blocks]
+
         self.blk_values = [block.values for block in self.dummy._mgr.blocks]
 
         for values in self.blk_values:
@@ -441,6 +445,9 @@ cdef class BlockSlider:
         cdef:
             ndarray arr
             Py_ssize_t i
+
+        self._restore_blocks()
+
         # move blocks
         for i in range(self.nblocks):
             arr = self.blk_values[i]
@@ -460,9 +467,21 @@ cdef class BlockSlider:
         cdef:
             ndarray arr
             Py_ssize_t i
+
+        self._restore_blocks()
+
         for i in range(self.nblocks):
             arr = self.blk_values[i]
 
             # axis=1 is the frame's axis=0
             arr.data = self.base_ptrs[i]
             arr.shape[1] = 0
+
+    cdef _restore_blocks(self):
+        """
+        Ensure that we have the original blocks, blknos, and blklocs.
+        """
+        mgr = self.dummy._mgr
+        mgr.blocks = self.blocks
+        mgr._blklocs = self.orig_blklocs
+        mgr._blknos = self.orig_blknos
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index d98c55755042e..65e79faec65a6 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -198,13 +198,10 @@ def apply(self, f: F, data: FrameOrSeries, axis: int = 0):
             try:
                 result_values, mutated = splitter.fast_apply(f, sdata, group_keys)
 
-            except libreduction.InvalidApply as err:
-                # This Exception is raised if `f` triggers an exception
-                # but it is preferable to raise the exception in Python.
-                if "Let this error raise above us" not in str(err):
-                    # TODO: can we infer anything about whether this is
-                    #  worth-retrying in pure-python?
-                    raise
+            except IndexError:
+                # test_apply_mutate this is a rare case in which re-running
+                #  in python-space may make a difference
+                pass
 
             else:
                 # If the fast apply path could be used we can return here.

From abe87f8e60db731dc22b567c236885fe17059ac6 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sat, 12 Dec 2020 16:18:22 -0800
Subject: [PATCH 2/2] flesh out comment, remove InvalidApply

---
 pandas/_libs/reduction.pyx | 4 ----
 pandas/core/groupby/ops.py | 4 ++--
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx
index 960f85e224ad8..25b41b020aee6 100644
--- a/pandas/_libs/reduction.pyx
+++ b/pandas/_libs/reduction.pyx
@@ -335,10 +335,6 @@ cdef class Slider:
         self.buf.shape[0] = 0
 
 
-class InvalidApply(Exception):
-    pass
-
-
 def apply_frame_axis0(object frame, object f, object names,
                       const int64_t[:] starts, const int64_t[:] ends):
     cdef:
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index f1c40290a8fff..d1a4fc6fc74e5 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -203,8 +203,8 @@ def apply(self, f: F, data: FrameOrSeries, axis: int = 0):
                 result_values, mutated = splitter.fast_apply(f, sdata, group_keys)
 
             except IndexError:
-                # test_apply_mutate this is a rare case in which re-running
-                #  in python-space may make a difference
+                # This is a rare case in which re-running in python-space may
+                #  make a difference, see  test_apply_mutate.test_mutate_groups
                 pass
 
             else: