From 2d9e7d93c3721473a2f6a8b7b442cd33fab648d5 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 14 Nov 2019 17:28:32 -0800 Subject: [PATCH 1/2] REF: de-duplicate _apply_to_group --- pandas/_libs/reduction.pyx | 50 ++++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index 8733249888ae9..99454e155a40b 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -189,6 +189,24 @@ cdef class _BaseGrouper: return cached_typ, cached_ityp + cdef inline object _apply_to_group(self, + object cached_typ, object cached_ityp, + Slider islider, Slider vslider, + Py_ssize_t group_size, bint* initialized): + cached_ityp._engine.clear_mapping() + res = self.f(cached_typ) + res = _extract_result(res) + if not initialized[0]: + # On the first pass, we check the output shape to see + # if this looks like a reduction. + initialized[0] = 1 + _check_result_array(res, len(self.dummy_arr)) + + islider.advance(group_size) + vslider.advance(group_size) + + return res + cdef class SeriesBinGrouper(_BaseGrouper): """ @@ -217,7 +235,7 @@ cdef class SeriesBinGrouper(_BaseGrouper): self.typ = series._constructor self.ityp = series.index._constructor self.index = series.index.values - self.name = getattr(series, 'name', None) + self.name = series.name self.dummy_arr, self.dummy_index = self._check_dummy(dummy) @@ -265,20 +283,12 @@ cdef class SeriesBinGrouper(_BaseGrouper): cached_typ, cached_ityp = self._update_cached_objs( cached_typ, cached_ityp, islider, vslider) - cached_ityp._engine.clear_mapping() - res = self.f(cached_typ) - res = _extract_result(res) - if not initialized: - # On the first pass, we check the output shape to see - # if this looks like a reduction. - initialized = 1 - _check_result_array(res, len(self.dummy_arr)) + res = self._apply_to_group(cached_typ, cached_ityp, + islider, vslider, + group_size, &initialized) result[i] = res - islider.advance(group_size) - vslider.advance(group_size) - finally: # so we don't free the wrong memory islider.reset() @@ -322,7 +332,7 @@ cdef class SeriesGrouper(_BaseGrouper): self.typ = series._constructor self.ityp = series.index._constructor self.index = series.index.values - self.name = getattr(series, 'name', None) + self.name = series.name self.dummy_arr, self.dummy_index = self._check_dummy(dummy) self.ngroups = ngroups @@ -367,20 +377,12 @@ cdef class SeriesGrouper(_BaseGrouper): cached_typ, cached_ityp = self._update_cached_objs( cached_typ, cached_ityp, islider, vslider) - cached_ityp._engine.clear_mapping() - res = self.f(cached_typ) - res = _extract_result(res) - if not initialized: - # On the first pass, we check the output shape to see - # if this looks like a reduction. - initialized = 1 - _check_result_array(res, len(self.dummy_arr)) + res = self._apply_to_group(cached_typ, cached_ityp, + islider, vslider, + group_size, &initialized) result[lab] = res counts[lab] = group_size - islider.advance(group_size) - vslider.advance(group_size) - group_size = 0 finally: From 2439078e04b4dba909fc54fbcebbc482051b00fa Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 17 Nov 2019 08:00:58 -0800 Subject: [PATCH 2/2] requested edits --- pandas/_libs/reduction.pyx | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index 99454e155a40b..f5521b94b6c33 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -192,20 +192,23 @@ cdef class _BaseGrouper: cdef inline object _apply_to_group(self, object cached_typ, object cached_ityp, Slider islider, Slider vslider, - Py_ssize_t group_size, bint* initialized): + Py_ssize_t group_size, bint initialized): + """ + Call self.f on our new group, then update to the next group. + """ cached_ityp._engine.clear_mapping() res = self.f(cached_typ) res = _extract_result(res) - if not initialized[0]: + if not initialized: # On the first pass, we check the output shape to see # if this looks like a reduction. - initialized[0] = 1 + initialized = 1 _check_result_array(res, len(self.dummy_arr)) islider.advance(group_size) vslider.advance(group_size) - return res + return res, initialized cdef class SeriesBinGrouper(_BaseGrouper): @@ -283,9 +286,9 @@ cdef class SeriesBinGrouper(_BaseGrouper): cached_typ, cached_ityp = self._update_cached_objs( cached_typ, cached_ityp, islider, vslider) - res = self._apply_to_group(cached_typ, cached_ityp, - islider, vslider, - group_size, &initialized) + res, initialized = self._apply_to_group(cached_typ, cached_ityp, + islider, vslider, + group_size, initialized) result[i] = res @@ -377,9 +380,9 @@ cdef class SeriesGrouper(_BaseGrouper): cached_typ, cached_ityp = self._update_cached_objs( cached_typ, cached_ityp, islider, vslider) - res = self._apply_to_group(cached_typ, cached_ityp, - islider, vslider, - group_size, &initialized) + res, initialized = self._apply_to_group(cached_typ, cached_ityp, + islider, vslider, + group_size, initialized) result[lab] = res counts[lab] = group_size