diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py index 2387427d15670..8e278dc81a8cc 100644 --- a/pandas/core/groupby/base.py +++ b/pandas/core/groupby/base.py @@ -63,9 +63,8 @@ def _gotitem(self, key, ndim, subset=None): self = type(self)(subset, groupby=groupby, parent=self, **kwargs) self._reset_cache() - if subset.ndim == 2: - if is_scalar(key) and key in subset or is_list_like(key): - self._selection = key + if subset.ndim == 2 and (is_scalar(key) and key in subset or is_list_like(key)): + self._selection = key return self diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 457aed3a72799..d0b58e8abc4ee 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -512,12 +512,9 @@ def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs): elif func not in base.transform_kernel_allowlist: msg = f"'{func}' is not a valid function name for transform(name)" raise ValueError(msg) - elif func in base.cythonized_kernels: + elif func in base.cythonized_kernels or func in base.transformation_kernels: # cythonized transform or canned "agg+broadcast" return getattr(self, func)(*args, **kwargs) - elif func in base.transformation_kernels: - return getattr(self, func)(*args, **kwargs) - # If func is a reduction, we need to broadcast the # result to the whole group. Compute func result # and deal with possible broadcasting below. @@ -1111,8 +1108,7 @@ def blk_func(bvalues: ArrayLike) -> ArrayLike: # unwrap DataFrame to get array result = result._mgr.blocks[0].values - res_values = cast_agg_result(result, bvalues, how) - return res_values + return cast_agg_result(result, bvalues, how) # TypeError -> we may have an exception in trying to aggregate # continue and exclude the block @@ -1368,12 +1364,9 @@ def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs): elif func not in base.transform_kernel_allowlist: msg = f"'{func}' is not a valid function name for transform(name)" raise ValueError(msg) - elif func in base.cythonized_kernels: + elif func in base.cythonized_kernels or func in base.transformation_kernels: # cythonized transformation or canned "reduction+broadcast" return getattr(self, func)(*args, **kwargs) - elif func in base.transformation_kernels: - return getattr(self, func)(*args, **kwargs) - # GH 30918 # Use _transform_fast only when we know func is an aggregation if func in base.reduction_kernels: @@ -1401,9 +1394,10 @@ def _transform_fast(self, result: DataFrame) -> DataFrame: # by take operation ids, _, ngroup = self.grouper.group_info result = result.reindex(self.grouper.result_index, copy=False) - output = [] - for i, _ in enumerate(result.columns): - output.append(algorithms.take_1d(result.iloc[:, i].values, ids)) + output = [ + algorithms.take_1d(result.iloc[:, i].values, ids) + for i, _ in enumerate(result.columns) + ] return self.obj._constructor._from_arrays( output, columns=result.columns, index=obj.index @@ -1462,7 +1456,7 @@ def _transform_item_by_item(self, obj: DataFrame, wrapper) -> DataFrame: else: inds.append(i) - if len(output) == 0: + if not output: raise TypeError("Transform function invalid for data types") columns = obj.columns diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index c5bc9b563ea5e..32023576b0a91 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1001,7 +1001,7 @@ def _cython_transform(self, how: str, numeric_only: bool = True, **kwargs): key = base.OutputKey(label=name, position=idx) output[key] = result - if len(output) == 0: + if not output: raise DataError("No numeric types to aggregate") return self._wrap_transformed_output(output) @@ -1084,7 +1084,7 @@ def _cython_agg_general( output[key] = maybe_cast_result(result, obj, how=how) idx += 1 - if len(output) == 0: + if not output: raise DataError("No numeric types to aggregate") return self._wrap_aggregated_output(output, index=self.grouper.result_index) @@ -1182,7 +1182,7 @@ def _python_agg_general(self, func, *args, **kwargs): key = base.OutputKey(label=name, position=idx) output[key] = maybe_cast_result(result, obj, numeric_only=True) - if len(output) == 0: + if not output: return self._python_apply_general(f, self._selected_obj) if self.grouper._filter_empty_groups: @@ -2550,9 +2550,8 @@ def _get_cythonized_result( """ if result_is_index and aggregate: raise ValueError("'result_is_index' and 'aggregate' cannot both be True!") - if post_processing: - if not callable(post_processing): - raise ValueError("'post_processing' must be a callable!") + if post_processing and not callable(post_processing): + raise ValueError("'post_processing' must be a callable!") if pre_processing: if not callable(pre_processing): raise ValueError("'pre_processing' must be a callable!") @@ -2631,7 +2630,7 @@ def _get_cythonized_result( output[key] = result # error_msg is "" on an frame/series with no rows or columns - if len(output) == 0 and error_msg != "": + if not output and error_msg != "": raise TypeError(error_msg) if aggregate: diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 9f0d953a2cc71..ff5379567f090 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -593,23 +593,25 @@ def group_index(self) -> Index: return self._group_index def _make_codes(self) -> None: - if self._codes is None or self._group_index is None: - # we have a list of groupers - if isinstance(self.grouper, ops.BaseGrouper): - codes = self.grouper.codes_info - uniques = self.grouper.result_index + if self._codes is not None and self._group_index is not None: + return + + # we have a list of groupers + if isinstance(self.grouper, ops.BaseGrouper): + codes = self.grouper.codes_info + uniques = self.grouper.result_index + else: + # GH35667, replace dropna=False with na_sentinel=None + if not self.dropna: + na_sentinel = None else: - # GH35667, replace dropna=False with na_sentinel=None - if not self.dropna: - na_sentinel = None - else: - na_sentinel = -1 - codes, uniques = algorithms.factorize( - self.grouper, sort=self.sort, na_sentinel=na_sentinel - ) - uniques = Index(uniques, name=self.name) - self._codes = codes - self._group_index = uniques + na_sentinel = -1 + codes, uniques = algorithms.factorize( + self.grouper, sort=self.sort, na_sentinel=na_sentinel + ) + uniques = Index(uniques, name=self.name) + self._codes = codes + self._group_index = uniques @cache_readonly def groups(self) -> Dict[Hashable, np.ndarray]: diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 15725230d850a..438030008bb4d 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -322,10 +322,9 @@ def result_index(self) -> Index: codes = self.reconstructed_codes levels = [ping.result_index for ping in self.groupings] - result = MultiIndex( + return MultiIndex( levels=levels, codes=codes, verify_integrity=False, names=self.names ) - return result def get_group_levels(self) -> List[Index]: if not self.compressed and len(self.groupings) == 1: