refactor core/groupby (#37583)

MarcoGorelli · web-flow · commit b7929b86c766 · 2020-11-04T17:57:59.000-05:00
diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py
@@ -63,9 +63,8 @@ def _gotitem(self, key, ndim, subset=None):
 
         self = type(self)(subset, groupby=groupby, parent=self, **kwargs)
         self._reset_cache()
-        if subset.ndim == 2:
-            if is_scalar(key) and key in subset or is_list_like(key):
-                self._selection = key
+        if subset.ndim == 2 and (is_scalar(key) and key in subset or is_list_like(key)):
+            self._selection = key
         return self
 
 
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -512,12 +512,9 @@ def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs):
         elif func not in base.transform_kernel_allowlist:
             msg = f"'{func}' is not a valid function name for transform(name)"
             raise ValueError(msg)
-        elif func in base.cythonized_kernels:
+        elif func in base.cythonized_kernels or func in base.transformation_kernels:
             # cythonized transform or canned "agg+broadcast"
             return getattr(self, func)(*args, **kwargs)
-        elif func in base.transformation_kernels:
-            return getattr(self, func)(*args, **kwargs)
-
         # If func is a reduction, we need to broadcast the
         # result to the whole group. Compute func result
         # and deal with possible broadcasting below.
@@ -1111,8 +1108,7 @@ def blk_func(bvalues: ArrayLike) -> ArrayLike:
                 # unwrap DataFrame to get array
                 result = result._mgr.blocks[0].values
 
-            res_values = cast_agg_result(result, bvalues, how)
-            return res_values
+            return cast_agg_result(result, bvalues, how)
 
         # TypeError -> we may have an exception in trying to aggregate
         #  continue and exclude the block
@@ -1368,12 +1364,9 @@ def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs):
         elif func not in base.transform_kernel_allowlist:
             msg = f"'{func}' is not a valid function name for transform(name)"
             raise ValueError(msg)
-        elif func in base.cythonized_kernels:
+        elif func in base.cythonized_kernels or func in base.transformation_kernels:
             # cythonized transformation or canned "reduction+broadcast"
             return getattr(self, func)(*args, **kwargs)
-        elif func in base.transformation_kernels:
-            return getattr(self, func)(*args, **kwargs)
-
         # GH 30918
         # Use _transform_fast only when we know func is an aggregation
         if func in base.reduction_kernels:
@@ -1401,9 +1394,10 @@ def _transform_fast(self, result: DataFrame) -> DataFrame:
         # by take operation
         ids, _, ngroup = self.grouper.group_info
         result = result.reindex(self.grouper.result_index, copy=False)
-        output = []
-        for i, _ in enumerate(result.columns):
-            output.append(algorithms.take_1d(result.iloc[:, i].values, ids))
+        output = [
+            algorithms.take_1d(result.iloc[:, i].values, ids)
+            for i, _ in enumerate(result.columns)
+        ]
 
         return self.obj._constructor._from_arrays(
             output, columns=result.columns, index=obj.index
@@ -1462,7 +1456,7 @@ def _transform_item_by_item(self, obj: DataFrame, wrapper) -> DataFrame:
             else:
                 inds.append(i)
 
-        if len(output) == 0:
+        if not output:
             raise TypeError("Transform function invalid for data types")
 
         columns = obj.columns
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -1001,7 +1001,7 @@ def _cython_transform(self, how: str, numeric_only: bool = True, **kwargs):
             key = base.OutputKey(label=name, position=idx)
             output[key] = result
 
-        if len(output) == 0:
+        if not output:
             raise DataError("No numeric types to aggregate")
 
         return self._wrap_transformed_output(output)
@@ -1084,7 +1084,7 @@ def _cython_agg_general(
                 output[key] = maybe_cast_result(result, obj, how=how)
                 idx += 1
 
-        if len(output) == 0:
+        if not output:
             raise DataError("No numeric types to aggregate")
 
         return self._wrap_aggregated_output(output, index=self.grouper.result_index)
@@ -1182,7 +1182,7 @@ def _python_agg_general(self, func, *args, **kwargs):
             key = base.OutputKey(label=name, position=idx)
             output[key] = maybe_cast_result(result, obj, numeric_only=True)
 
-        if len(output) == 0:
+        if not output:
             return self._python_apply_general(f, self._selected_obj)
 
         if self.grouper._filter_empty_groups:
@@ -2550,9 +2550,8 @@ def _get_cythonized_result(
         """
         if result_is_index and aggregate:
             raise ValueError("'result_is_index' and 'aggregate' cannot both be True!")
-        if post_processing:
-            if not callable(post_processing):
-                raise ValueError("'post_processing' must be a callable!")
+        if post_processing and not callable(post_processing):
+            raise ValueError("'post_processing' must be a callable!")
         if pre_processing:
             if not callable(pre_processing):
                 raise ValueError("'pre_processing' must be a callable!")
@@ -2631,7 +2630,7 @@ def _get_cythonized_result(
             output[key] = result
 
         # error_msg is "" on an frame/series with no rows or columns
-        if len(output) == 0 and error_msg != "":
+        if not output and error_msg != "":
             raise TypeError(error_msg)
 
         if aggregate:
diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
@@ -593,23 +593,25 @@ def group_index(self) -> Index:
         return self._group_index
 
     def _make_codes(self) -> None:
-        if self._codes is None or self._group_index is None:
-            # we have a list of groupers
-            if isinstance(self.grouper, ops.BaseGrouper):
-                codes = self.grouper.codes_info
-                uniques = self.grouper.result_index
+        if self._codes is not None and self._group_index is not None:
+            return
+
+        # we have a list of groupers
+        if isinstance(self.grouper, ops.BaseGrouper):
+            codes = self.grouper.codes_info
+            uniques = self.grouper.result_index
+        else:
+            # GH35667, replace dropna=False with na_sentinel=None
+            if not self.dropna:
+                na_sentinel = None
             else:
-                # GH35667, replace dropna=False with na_sentinel=None
-                if not self.dropna:
-                    na_sentinel = None
-                else:
-                    na_sentinel = -1
-                codes, uniques = algorithms.factorize(
-                    self.grouper, sort=self.sort, na_sentinel=na_sentinel
-                )
-                uniques = Index(uniques, name=self.name)
-            self._codes = codes
-            self._group_index = uniques
+                na_sentinel = -1
+            codes, uniques = algorithms.factorize(
+                self.grouper, sort=self.sort, na_sentinel=na_sentinel
+            )
+            uniques = Index(uniques, name=self.name)
+        self._codes = codes
+        self._group_index = uniques
 
     @cache_readonly
     def groups(self) -> Dict[Hashable, np.ndarray]:
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
@@ -322,10 +322,9 @@ def result_index(self) -> Index:
 
         codes = self.reconstructed_codes
         levels = [ping.result_index for ping in self.groupings]
-        result = MultiIndex(
+        return MultiIndex(
             levels=levels, codes=codes, verify_integrity=False, names=self.names
         )
-        return result
 
     def get_group_levels(self) -> List[Index]:
         if not self.compressed and len(self.groupings) == 1: