From 2120417da1d41d86387030aef265211614e2e790 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 9 May 2021 09:30:51 -0700 Subject: [PATCH 1/2] REF: groupby remove _selection_name --- pandas/core/apply.py | 1 + pandas/core/base.py | 10 ------ pandas/core/groupby/generic.py | 61 ++++++++++++++++------------------ 3 files changed, 30 insertions(+), 42 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index ad25eb6fbcaa8..2b7e1608d1f9c 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -324,6 +324,7 @@ def agg_list_like(self) -> FrameOrSeriesUnion: # i.e. obj is Series or DataFrame selected_obj = obj elif obj._selected_obj.ndim == 1: + # For SeriesGroupBy this matches _obj_with_exclusions selected_obj = obj._selected_obj else: selected_obj = obj._obj_with_exclusions diff --git a/pandas/core/base.py b/pandas/core/base.py index 3270e3dd82f7d..eedd0611dd34e 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -173,16 +173,6 @@ class SelectionMixin: _internal_names = ["_cache", "__setstate__"] _internal_names_set = set(_internal_names) - @property - def _selection_name(self): - """ - Return a name for myself; - - This would ideally be called the 'name' property, - but we cannot conflict with the Series.name property which can be set. - """ - return self._selection - @final @property def _selection_list(self): diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 9287163053cac..0e56ef0fa8ec1 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -11,7 +11,6 @@ abc, namedtuple, ) -import copy from functools import partial from textwrap import dedent from typing import ( @@ -172,16 +171,8 @@ def _iterate_slices(self) -> Iterable[Series]: yield self._selected_obj @property - def _selection_name(self): - """ - since we are a series, we by definition only have - a single name, but may be the result of a selection or - the name of our object - """ - if self._selection is None: - return self.obj.name - else: - return self._selection + def _selection_name(self) -> Hashable: + return self.obj.name _agg_examples_doc = dedent( """ @@ -316,15 +307,9 @@ def _aggregate_multiple_funcs(self, arg) -> DataFrame: results: dict[base.OutputKey, FrameOrSeriesUnion] = {} for idx, (name, func) in enumerate(arg): - obj = self - # reset the cache so that we - # only include the named selection - if name in self._selected_obj: - obj = copy.copy(obj) - obj._reset_cache() - obj._selection = name - results[base.OutputKey(label=name, position=idx)] = obj.aggregate(func) + key = base.OutputKey(label=name, position=idx) + results[key] = self.aggregate(func) if any(isinstance(x, DataFrame) for x in results.values()): from pandas import concat @@ -466,7 +451,7 @@ def _wrap_applied_output( # GH #6265 return self.obj._constructor( [], - name=self._selection_name, + name=self.obj.name, index=self.grouper.result_index, dtype=data.dtype, ) @@ -488,14 +473,14 @@ def _get_index() -> Index: # if self.observed is False, # keep all-NaN rows created while re-indexing result = result.stack(dropna=self.observed) - result.name = self._selection_name + result.name = self.obj.name return result elif isinstance(values[0], (Series, DataFrame)): return self._concat_objects(keys, values, not_indexed_same=not_indexed_same) else: # GH #6265 #24880 result = self.obj._constructor( - data=values, index=_get_index(), name=self._selection_name + data=values, index=_get_index(), name=self.obj.name ) return self._reindex_output(result) @@ -533,7 +518,7 @@ def _transform_general(self, func: Callable, *args, **kwargs) -> Series: Transform with a callable func`. """ assert callable(func) - klass = type(self._selected_obj) + klass = type(self.obj) results = [] for name, group in self: @@ -555,8 +540,10 @@ def _transform_general(self, func: Callable, *args, **kwargs) -> Series: else: result = self.obj._constructor(dtype=np.float64) - result.name = self._selected_obj.name - return result + result.name = self.obj.name + # error: Incompatible return value type (got "Union[DataFrame, Series]", + # expected "Series") + return result # type: ignore[return-value] def _can_use_transform_fast(self, result) -> bool: return True @@ -676,7 +663,7 @@ def nunique(self, dropna: bool = True) -> Series: res, out = np.zeros(len(ri), dtype=out.dtype), res res[ids[idx]] = out - result = self.obj._constructor(res, index=ri, name=self._selection_name) + result = self.obj._constructor(res, index=ri, name=self.obj.name) return self._reindex_output(result, fill_value=0) @doc(Series.describe) @@ -782,7 +769,7 @@ def apply_series_value_counts(): levels = [ping.group_index for ping in self.grouper.groupings] + [ lev # type: ignore[list-item] ] - names = self.grouper.names + [self._selection_name] + names = self.grouper.names + [self.obj.name] if dropna: mask = codes[-1] != -1 @@ -838,7 +825,7 @@ def build_codes(lev_codes: np.ndarray) -> np.ndarray: if is_integer_dtype(out.dtype): out = ensure_int64(out) - return self.obj._constructor(out, index=mi, name=self._selection_name) + return self.obj._constructor(out, index=mi, name=self.obj.name) def count(self) -> Series: """ @@ -859,7 +846,7 @@ def count(self) -> Series: result = self.obj._constructor( out, index=self.grouper.result_index, - name=self._selection_name, + name=self.obj.name, dtype="int64", ) return self._reindex_output(result, fill_value=0) @@ -1026,7 +1013,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs) if isinstance(sobj, Series): # GH#35246 test_groupby_as_index_select_column_sum_empty_df - result.columns = [self._selected_obj.name] + result.columns = [sobj.name] else: # select everything except for the last level, which is the one # containing the name of the function(s), see GH#32040 @@ -1042,6 +1029,16 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs) agg = aggregate + @property + def _selection_name(self): + """ + Return a name for myself; + + This would ideally be called the 'name' property, + but we cannot conflict with the Series.name property which can be set. + """ + return self._selection + def _iterate_slices(self) -> Iterable[Series]: obj = self._selected_obj if self.axis == 1: @@ -1152,11 +1149,11 @@ def _wrap_applied_output(self, data, keys, values, not_indexed_same=False): # TODO: sure this is right? we used to do this # after raising AttributeError above return self.obj._constructor_sliced( - values, index=key_index, name=self._selection_name + values, index=key_index, name=self._selection ) elif not isinstance(first_not_none, Series): # values are not series or array-like but scalars - # self._selection_name not passed through to Series as the + # self._selection not passed through to Series as the # result should not take the name of original selection # of columns if self.as_index: From 0a1f2e474e0b52833881ec535657ceb85a67672c Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 10 May 2021 17:03:28 -0700 Subject: [PATCH 2/2] Remove _selection_name --- pandas/core/groupby/generic.py | 14 -------------- pandas/core/groupby/groupby.py | 5 +++-- 2 files changed, 3 insertions(+), 16 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index db3bc84e70695..4fff12d45af7d 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -168,10 +168,6 @@ class SeriesGroupBy(GroupBy[Series]): def _iterate_slices(self) -> Iterable[Series]: yield self._selected_obj - @property - def _selection_name(self) -> Hashable: - return self.obj.name - _agg_examples_doc = dedent( """ Examples @@ -1051,16 +1047,6 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs) agg = aggregate - @property - def _selection_name(self): - """ - Return a name for myself; - - This would ideally be called the 'name' property, - but we cannot conflict with the Series.name property which can be set. - """ - return self._selection - def _iterate_slices(self) -> Iterable[Series]: obj = self._selected_obj if self.axis == 1: diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 2510fcaa84f1c..2091d2fc484e1 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1052,9 +1052,10 @@ def reset_identity(values): values = reset_identity(values) result = concat(values, axis=self.axis) - if isinstance(result, Series) and self._selection_name is not None: + name = self.obj.name if self.obj.ndim == 1 else self._selection + if isinstance(result, Series) and name is not None: - result.name = self._selection_name + result.name = name return result