From 6d9d2253a4244413f9c70622a96461ce2c8d5e15 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 11 Nov 2019 10:05:53 -0800
Subject: [PATCH 1/2] REF: pre-allocate result

---
 pandas/_libs/reduction.pyx | 32 +++++++++++---------------------
 1 file changed, 11 insertions(+), 21 deletions(-)
diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx
index e6e658c0c6979..5a124c785138f 100644
--- a/pandas/_libs/reduction.pyx
+++ b/pandas/_libs/reduction.pyx
@@ -113,8 +113,9 @@ cdef class Reducer:
         chunk.data = arr.data
         labels = self.labels
         has_labels = labels is not None
-        has_index = self.index is not None
-        incr = self.increment
+
+        result = _get_result_array(None, self.nresults, len(self.dummy))
+        it = <flatiter>PyArray_IterNew(result)
 
         try:
             for i in range(self.nresults):
@@ -131,26 +132,18 @@ cdef class Reducer:
                     if self.typ is not None:
 
                         # recreate with the index if supplied
-                        if has_index:
-
-                            cached_typ = self.typ(
-                                chunk, index=self.index, name=name)
-
-                        else:
-
-                            # use the passsed typ, sans index
-                            cached_typ = self.typ(chunk, name=name)
+                        cached_typ = self.typ(
+                            chunk, index=self.index, name=name)
 
                 # use the cached_typ if possible
                 if cached_typ is not None:
 
-                    if has_index:
-                        object.__setattr__(cached_typ, 'index', self.index)
-
+                    object.__setattr__(cached_typ, 'index', self.index)
                     object.__setattr__(
                         cached_typ._data._block, 'values', chunk)
                     object.__setattr__(cached_typ, 'name', name)
                     res = self.f(cached_typ)
+
                 else:
                     res = self.f(chunk)
 
@@ -158,10 +151,9 @@ cdef class Reducer:
                         and util.is_array(res.values)):
                     res = res.values
                 if i == 0:
-                    result = _get_result_array(res,
-                                               self.nresults,
-                                               len(self.dummy))
-                    it = <flatiter>PyArray_IterNew(result)
+                    # On the first pass, we check the output shape to see
+                    #  if this looks like a reduction.
+                    _get_result_array(res, 0, len(self.dummy))
 
                 PyArray_SETITEM(result, PyArray_ITER_DATA(it), res)
                 chunk.data = chunk.data + self.increment
@@ -170,9 +162,7 @@ cdef class Reducer:
             # so we don't free the wrong memory
             chunk.data = dummy_buf
 
-        if result.dtype == np.object_:
-            result = maybe_convert_objects(result)
-
+        result = maybe_convert_objects(result)
         return result
 
 

From aaa906f0107b2473636a83154bc47a18f0407583 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 11 Nov 2019 11:38:05 -0800
Subject: [PATCH 2/2] REF: pre-allocate results

---
 pandas/_libs/reduction.pyx | 52 +++++++++++++++++++++++---------------
 1 file changed, 31 insertions(+), 21 deletions(-)

diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx
index 5a124c785138f..79198fa1630bb 100644
--- a/pandas/_libs/reduction.pyx
+++ b/pandas/_libs/reduction.pyx
@@ -18,15 +18,13 @@ cimport pandas._libs.util as util
 from pandas._libs.lib import maybe_convert_objects
 
 
-cdef _get_result_array(object obj, Py_ssize_t size, Py_ssize_t cnt):
+cdef _check_result_array(object obj, Py_ssize_t cnt):
 
     if (util.is_array(obj) or
             (isinstance(obj, list) and len(obj) == cnt) or
             getattr(obj, 'shape', None) == (cnt,)):
         raise ValueError('Function does not reduce')
 
-    return np.empty(size, dtype='O')
-
 
 cdef bint _is_sparse_array(object obj):
     # TODO can be removed one SparseArray.values is removed (GH26421)
@@ -113,8 +111,10 @@ cdef class Reducer:
         chunk.data = arr.data
         labels = self.labels
         has_labels = labels is not None
+        has_index = self.index is not None
+        incr = self.increment
 
-        result = _get_result_array(None, self.nresults, len(self.dummy))
+        result = np.empty(self.nresults, dtype='O')
         it = <flatiter>PyArray_IterNew(result)
 
         try:
@@ -132,18 +132,26 @@ cdef class Reducer:
                     if self.typ is not None:
 
                         # recreate with the index if supplied
-                        cached_typ = self.typ(
-                            chunk, index=self.index, name=name)
+                        if has_index:
+
+                            cached_typ = self.typ(
+                                chunk, index=self.index, name=name)
+
+                        else:
+
+                            # use the passsed typ, sans index
+                            cached_typ = self.typ(chunk, name=name)
 
                 # use the cached_typ if possible
                 if cached_typ is not None:
 
-                    object.__setattr__(cached_typ, 'index', self.index)
+                    if has_index:
+                        object.__setattr__(cached_typ, 'index', self.index)
+
                     object.__setattr__(
                         cached_typ._data._block, 'values', chunk)
                     object.__setattr__(cached_typ, 'name', name)
                     res = self.f(cached_typ)
-
                 else:
                     res = self.f(chunk)
 
@@ -153,7 +161,7 @@ cdef class Reducer:
                 if i == 0:
                     # On the first pass, we check the output shape to see
                     #  if this looks like a reduction.
-                    _get_result_array(res, 0, len(self.dummy))
+                    _check_result_array(res, len(self.dummy))
 
                 PyArray_SETITEM(result, PyArray_ITER_DATA(it), res)
                 chunk.data = chunk.data + self.increment
@@ -248,6 +256,8 @@ cdef class SeriesBinGrouper(_BaseGrouper):
         vslider = Slider(self.arr, self.dummy_arr)
         islider = Slider(self.index, self.dummy_index)
 
+        result = np.empty(self.ngroups, dtype='O')
+
         try:
             for i in range(self.ngroups):
                 group_size = counts[i]
@@ -274,10 +284,11 @@ cdef class SeriesBinGrouper(_BaseGrouper):
                 res = self.f(cached_typ)
                 res = _extract_result(res)
                 if not initialized:
+                    # On the first pass, we check the output shape to see
+                    #  if this looks like a reduction.
                     initialized = 1
-                    result = _get_result_array(res,
-                                               self.ngroups,
-                                               len(self.dummy_arr))
+                    _check_result_array(res, len(self.dummy_arr))
+
                 result[i] = res
 
                 islider.advance(group_size)
@@ -288,9 +299,7 @@ cdef class SeriesBinGrouper(_BaseGrouper):
             islider.reset()
             vslider.reset()
 
-        if result.dtype == np.object_:
-            result = maybe_convert_objects(result)
-
+        result = maybe_convert_objects(result)
         return result, counts
 
 
@@ -349,6 +358,8 @@ cdef class SeriesGrouper(_BaseGrouper):
         vslider = Slider(self.arr, self.dummy_arr)
         islider = Slider(self.index, self.dummy_index)
 
+        result = np.empty(self.ngroups, dtype='O')
+
         try:
             for i in range(n):
                 group_size += 1
@@ -381,10 +392,10 @@ cdef class SeriesGrouper(_BaseGrouper):
                     res = self.f(cached_typ)
                     res = _extract_result(res)
                     if not initialized:
+                        # On the first pass, we check the output shape to see
+                        #  if this looks like a reduction.
                         initialized = 1
-                        result = _get_result_array(res,
-                                                   self.ngroups,
-                                                   len(self.dummy_arr))
+                        _check_result_array(res, len(self.dummy_arr))
 
                     result[lab] = res
                     counts[lab] = group_size
@@ -398,11 +409,10 @@ cdef class SeriesGrouper(_BaseGrouper):
             islider.reset()
             vslider.reset()
 
-        if result is None:
+        if not initialized:
             raise ValueError("No result.")
 
-        if result.dtype == np.object_:
-            result = maybe_convert_objects(result)
+        result = maybe_convert_objects(result)
 
         return result, counts