diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index e6e658c0c6979..a19226670ec0a 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -323,6 +323,10 @@ cdef class SeriesGrouper(_BaseGrouper): # safer obj._get_values(slice(None, 0)) assert dummy is not None + if len(series) == 0: + # get_result would never assign `result` + raise ValueError("SeriesGrouper requires non-empty `series`") + self.labels = labels self.f = f @@ -408,8 +412,9 @@ cdef class SeriesGrouper(_BaseGrouper): islider.reset() vslider.reset() - if result is None: - raise ValueError("No result.") + # We check for empty series in the constructor, so should always + # have result initialized by this point. + assert result is not None, "`result` has not been assigned." if result.dtype == np.object_: result = maybe_convert_objects(result) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 6796239cf3fd9..ae397277de41c 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -604,7 +604,11 @@ def agg_series(self, obj: Series, func): # Caller is responsible for checking ngroups != 0 assert self.ngroups != 0 - if is_extension_array_dtype(obj.dtype) and obj.dtype.kind != "M": + if len(obj) == 0: + # SeriesGrouper would raise if we were to call _aggregate_series_fast + return self._aggregate_series_pure_python(obj, func) + + elif is_extension_array_dtype(obj.dtype) and obj.dtype.kind != "M": # _aggregate_series_fast would raise TypeError when # calling libreduction.Slider # TODO: can we get a performant workaround for EAs backed by ndarray? @@ -618,10 +622,7 @@ def agg_series(self, obj: Series, func): try: return self._aggregate_series_fast(obj, func) except ValueError as err: - if "No result." in str(err): - # raised in libreduction - pass - elif "Function does not reduce" in str(err): + if "Function does not reduce" in str(err): # raised in libreduction pass else: @@ -632,6 +633,7 @@ def _aggregate_series_fast(self, obj, func): # At this point we have already checked that # - obj.index is not a MultiIndex # - obj is backed by an ndarray, not ExtensionArray + # - len(obj) > 0 # - ngroups != 0 func = self._is_builtin_func(func) diff --git a/pandas/tests/groupby/test_bin_groupby.py b/pandas/tests/groupby/test_bin_groupby.py index 0e7a66769d2d4..4ede6b165c691 100644 --- a/pandas/tests/groupby/test_bin_groupby.py +++ b/pandas/tests/groupby/test_bin_groupby.py @@ -25,6 +25,16 @@ def test_series_grouper(): tm.assert_almost_equal(counts, exp_counts) +def test_series_grouper_requires_nonempty_raises(): + # GH#29500 + obj = Series(np.random.randn(10)) + dummy = obj[:0] + labels = np.array([-1, -1, -1, 0, 0, 0, 1, 1, 1, 1], dtype=np.int64) + + with pytest.raises(ValueError, match="SeriesGrouper requires non-empty `series`"): + libreduction.SeriesGrouper(dummy, np.mean, labels, 2, dummy) + + def test_series_bin_grouper(): obj = Series(np.random.randn(10)) dummy = obj[:0]