From cf68ab00ada85744e40a8b06b171b5b91d5e9853 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 3 Dec 2020 20:31:51 -0800 Subject: [PATCH 1/2] REF: avoid catching all exceptions in libreduction --- pandas/_libs/reduction.pyx | 31 +++++++++++++++++++++++++------ pandas/core/groupby/ops.py | 11 ++++------- 2 files changed, 29 insertions(+), 13 deletions(-) diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index 4b6b71088cb7c..960f85e224ad8 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -365,11 +365,7 @@ def apply_frame_axis0(object frame, object f, object names, chunk = slider.dummy object.__setattr__(chunk, 'name', names[i]) - try: - piece = f(chunk) - except Exception as err: - # We can't be more specific without knowing something about `f` - raise InvalidApply("Let this error raise above us") from err + piece = f(chunk) # Need to infer if low level index slider will cause segfaults require_slow_apply = i == 0 and piece is chunk @@ -406,7 +402,8 @@ cdef class BlockSlider: """ cdef: object frame, dummy, index, block - list blk_values + list blocks, blk_values + ndarray orig_blklocs, orig_blknos ndarray values Slider idx_slider char **base_ptrs @@ -418,6 +415,13 @@ cdef class BlockSlider: self.dummy = frame[:0] self.index = self.dummy.index + # GH#35417 attributes we need to restore at each step in case + # the function modified them. + mgr = self.dummy._mgr + self.orig_blklocs = mgr.blklocs + self.orig_blknos = mgr.blknos + self.blocks = [x for x in self.dummy._mgr.blocks] + self.blk_values = [block.values for block in self.dummy._mgr.blocks] for values in self.blk_values: @@ -441,6 +445,9 @@ cdef class BlockSlider: cdef: ndarray arr Py_ssize_t i + + self._restore_blocks() + # move blocks for i in range(self.nblocks): arr = self.blk_values[i] @@ -460,9 +467,21 @@ cdef class BlockSlider: cdef: ndarray arr Py_ssize_t i + + self._restore_blocks() + for i in range(self.nblocks): arr = self.blk_values[i] # axis=1 is the frame's axis=0 arr.data = self.base_ptrs[i] arr.shape[1] = 0 + + cdef _restore_blocks(self): + """ + Ensure that we have the original blocks, blknos, and blklocs. + """ + mgr = self.dummy._mgr + mgr.blocks = self.blocks + mgr._blklocs = self.orig_blklocs + mgr._blknos = self.orig_blknos diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index d98c55755042e..65e79faec65a6 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -198,13 +198,10 @@ def apply(self, f: F, data: FrameOrSeries, axis: int = 0): try: result_values, mutated = splitter.fast_apply(f, sdata, group_keys) - except libreduction.InvalidApply as err: - # This Exception is raised if `f` triggers an exception - # but it is preferable to raise the exception in Python. - if "Let this error raise above us" not in str(err): - # TODO: can we infer anything about whether this is - # worth-retrying in pure-python? - raise + except IndexError: + # test_apply_mutate this is a rare case in which re-running + # in python-space may make a difference + pass else: # If the fast apply path could be used we can return here. From abe87f8e60db731dc22b567c236885fe17059ac6 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 12 Dec 2020 16:18:22 -0800 Subject: [PATCH 2/2] flesh out comment, remove InvalidApply --- pandas/_libs/reduction.pyx | 4 ---- pandas/core/groupby/ops.py | 4 ++-- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index 960f85e224ad8..25b41b020aee6 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -335,10 +335,6 @@ cdef class Slider: self.buf.shape[0] = 0 -class InvalidApply(Exception): - pass - - def apply_frame_axis0(object frame, object f, object names, const int64_t[:] starts, const int64_t[:] ends): cdef: diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index f1c40290a8fff..d1a4fc6fc74e5 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -203,8 +203,8 @@ def apply(self, f: F, data: FrameOrSeries, axis: int = 0): result_values, mutated = splitter.fast_apply(f, sdata, group_keys) except IndexError: - # test_apply_mutate this is a rare case in which re-running - # in python-space may make a difference + # This is a rare case in which re-running in python-space may + # make a difference, see test_apply_mutate.test_mutate_groups pass else: