From d2d72569458043021ae719cd607c49fbd483ba17 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 8 Nov 2019 16:26:09 -0800 Subject: [PATCH 1/3] REF: Avoid result=None by checking ngroups up front --- pandas/core/groupby/groupby.py | 11 +++++++---- pandas/core/groupby/ops.py | 12 ++++++++++-- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index e73be29d5b104..cc2f5d25ba950 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -898,6 +898,11 @@ def _python_agg_general(self, func, *args, **kwargs): # iterate through "columns" ex exclusions to populate output dict output = {} for name, obj in self._iterate_slices(): + if self.grouper.ngroups == 0: + # agg_series below assumes ngroups > 0 + # 3 test cases get here + continue + try: # if this function is invalid for this dtype, we will ignore it. func(obj[:0]) @@ -911,10 +916,8 @@ def _python_agg_general(self, func, *args, **kwargs): pass result, counts = self.grouper.agg_series(obj, f) - if result is not None: - # TODO: only 3 test cases get None here, do something - # in those cases - output[name] = self._try_cast(result, obj, numeric_only=True) + assert result is not None + output[name] = self._try_cast(result, obj, numeric_only=True) if len(output) == 0: return self._python_apply_general(f) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index e438db6c620ec..93f197afa0c64 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -583,6 +583,9 @@ def _transform( return result def agg_series(self, obj: Series, func): + # Caller is responsible for checking ngroups != 0 + assert self.ngroups != 0 + if is_extension_array_dtype(obj.dtype) and obj.dtype.kind != "M": # _aggregate_series_fast would raise TypeError when # calling libreduction.Slider @@ -608,8 +611,10 @@ def agg_series(self, obj: Series, func): return self._aggregate_series_pure_python(obj, func) def _aggregate_series_fast(self, obj, func): - # At this point we have already checked that obj.index is not a MultiIndex - # and that obj is backed by an ndarray, not ExtensionArray + # At this point we have already checked that + # - obj.index is not a MultiIndex + # - obj is backed by an ndarray, not ExtensionArray + # - ngroups != 0 func = self._is_builtin_func(func) group_index, _, ngroups = self.group_info @@ -797,6 +802,9 @@ def groupings(self): ] def agg_series(self, obj: Series, func): + # Caller is responsible for checking ngroups != 0 + assert self.ngroups != 0 + if is_extension_array_dtype(obj.dtype): # pre-empty SeriesBinGrouper from raising TypeError # TODO: watch out, this can return None From e109f1c0a32105b7b76a162ff38b4b7a9d195e25 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 9 Nov 2019 17:33:33 -0800 Subject: [PATCH 2/3] remove comment --- pandas/core/groupby/groupby.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index cc2f5d25ba950..25ba35a1cc752 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -900,7 +900,6 @@ def _python_agg_general(self, func, *args, **kwargs): for name, obj in self._iterate_slices(): if self.grouper.ngroups == 0: # agg_series below assumes ngroups > 0 - # 3 test cases get here continue try: From 81630701afd1d5c5e6a83b58bf95f92c892d4f12 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 10 Nov 2019 08:25:42 -0800 Subject: [PATCH 3/3] missed check --- pandas/core/groupby/ops.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 93f197afa0c64..d6cdb7dfb8b72 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -647,11 +647,9 @@ def _aggregate_series_pure_python(self, obj, func): counts[label] = group.shape[0] result[label] = res - if result is not None: - # if splitter is empty, result can be None, in which case - # maybe_convert_objects would raise TypeError - result = lib.maybe_convert_objects(result, try_float=0) - # TODO: try_cast back to EA? + assert result is not None + result = lib.maybe_convert_objects(result, try_float=0) + # TODO: try_cast back to EA? return result, counts