REF: make libreduction behavior match _aggregate_series_pure_python (pandas-dev#41242)

jbrockmendel · yeshsurya · commit fa1c6eba5c66 · 2021-05-06T14:24:58.000+05:30
diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx
@@ -27,11 +27,11 @@ from pandas._libs.lib import (
 )
 
 
-cpdef check_result_array(object obj, Py_ssize_t cnt):
+cpdef check_result_array(object obj):
 
     if (is_array(obj) or
-            (isinstance(obj, list) and len(obj) == cnt) or
-            getattr(obj, 'shape', None) == (cnt,)):
+            (isinstance(obj, list) and len(obj) == 0) or
+            getattr(obj, 'shape', None) == (0,)):
         raise ValueError('Must produce aggregated value')
 
 
@@ -89,9 +89,7 @@ cdef class _BaseGrouper:
             # On the first pass, we check the output shape to see
             #  if this looks like a reduction.
             initialized = True
-            # In all tests other than test_series_grouper and
-            #  test_series_bin_grouper, we have len(self.dummy_arr) == 0
-            check_result_array(res, len(self.dummy_arr))
+            check_result_array(res)
 
         return res, initialized
 
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -510,6 +510,8 @@ def _get_index() -> Index:
             return self._reindex_output(result)
 
     def _aggregate_named(self, func, *args, **kwargs):
+        # Note: this is very similar to _aggregate_series_pure_python,
+        #  but that does not pin group.name
         result = {}
         initialized = False
 
@@ -522,7 +524,7 @@ def _aggregate_named(self, func, *args, **kwargs):
             output = libreduction.extract_result(output)
             if not initialized:
                 # We only do this validation on the first iteration
-                libreduction.check_result_array(output, 0)
+                libreduction.check_result_array(output)
                 initialized = True
             result[name] = output
 
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
@@ -985,15 +985,7 @@ def agg_series(self, obj: Series, func: F) -> tuple[ArrayLike, np.ndarray]:
             # Preempt TypeError in _aggregate_series_fast
             return self._aggregate_series_pure_python(obj, func)
 
-        try:
-            return self._aggregate_series_fast(obj, func)
-        except ValueError as err:
-            if "Must produce aggregated value" in str(err):
-                # raised in libreduction
-                pass
-            else:
-                raise
-        return self._aggregate_series_pure_python(obj, func)
+        return self._aggregate_series_fast(obj, func)
 
     def _aggregate_series_fast(
         self, obj: Series, func: F
@@ -1023,9 +1015,10 @@ def _aggregate_series_pure_python(self, obj: Series, func: F):
         result = np.empty(ngroups, dtype="O")
         initialized = False
 
+        # equiv: splitter = self._get_splitter(obj, axis=0)
         splitter = get_splitter(obj, group_index, ngroups, axis=0)
 
-        for label, group in enumerate(splitter):
+        for i, group in enumerate(splitter):
 
             # Each step of this loop corresponds to
             #  libreduction._BaseGrouper._apply_to_group
@@ -1034,11 +1027,11 @@ def _aggregate_series_pure_python(self, obj: Series, func: F):
 
             if not initialized:
                 # We only do this validation on the first iteration
-                libreduction.check_result_array(res, 0)
+                libreduction.check_result_array(res)
                 initialized = True
 
-            counts[label] = group.shape[0]
-            result[label] = res
+            counts[i] = group.shape[0]
+            result[i] = res
 
         npvalues = lib.maybe_convert_objects(result, try_float=False)
         out = maybe_cast_pointwise_result(npvalues, obj.dtype, numeric_only=True)